Line data Source code
1 : /*************************************************************************
2 : ALGLIB 3.17.0 (source code generated 2020-12-27)
3 : Copyright (c) Sergey Bochkanov (ALGLIB project).
4 :
5 : >>> SOURCE LICENSE >>>
6 : This program is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation (www.fsf.org); either version 2 of the
9 : License, or (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : A copy of the GNU General Public License is available at
17 : http://www.fsf.org/licensing/licenses
18 : >>> END OF LICENSE >>>
19 : *************************************************************************/
20 : #ifdef _MSC_VER
21 : #define _CRT_SECURE_NO_WARNINGS
22 : #endif
23 : #include "stdafx.h"
24 : #include "dataanalysis.h"
25 :
26 : // disable some irrelevant warnings
27 : #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
28 : #pragma warning(disable:4100)
29 : #pragma warning(disable:4127)
30 : #pragma warning(disable:4611)
31 : #pragma warning(disable:4702)
32 : #pragma warning(disable:4996)
33 : #endif
34 :
35 : /////////////////////////////////////////////////////////////////////////
36 : //
37 : // THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
38 : //
39 : /////////////////////////////////////////////////////////////////////////
40 : namespace alglib
41 : {
42 :
43 : #if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
44 :
45 : #endif
46 :
47 : #if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
48 :
49 : #endif
50 :
51 : #if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
52 :
53 : #endif
54 :
55 : #if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
56 :
57 : #endif
58 :
59 : #if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
60 :
61 : #endif
62 :
63 : #if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
64 :
65 : #endif
66 :
67 : #if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
68 :
69 : #endif
70 :
71 : #if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
72 :
73 : #endif
74 :
75 : #if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
76 :
77 : #endif
78 :
79 : #if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
80 :
81 : #endif
82 :
83 : #if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
84 :
85 : #endif
86 :
87 : #if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
88 :
89 : #endif
90 :
91 : #if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
92 :
93 : #endif
94 :
95 : #if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
96 :
97 : #endif
98 :
99 : #if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
100 :
101 : #endif
102 :
103 : #if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
104 : /*************************************************************************
105 : Principal components analysis
106 :
107 : This function builds orthogonal basis where first axis corresponds to
108 : direction with maximum variance, second axis maximizes variance in the
109 : subspace orthogonal to first axis and so on.
110 :
111 : This function builds FULL basis, i.e. returns N vectors corresponding to
112 : ALL directions, no matter how informative. If you need just a few (say,
113 : 10 or 50) of the most important directions, you may find it faster to use
114 : one of the reduced versions:
115 : * pcatruncatedsubspace() - for subspace iteration based method
116 :
117 : It should be noted that, unlike LDA, PCA does not use class labels.
118 :
119 : ! COMMERCIAL EDITION OF ALGLIB:
120 : !
121 : ! Commercial Edition of ALGLIB includes following important improvements
122 : ! of this function:
123 : ! * high-performance native backend with same C# interface (C# version)
124 : ! * multithreading support (C++ and C# versions)
125 : ! * hardware vendor (Intel) implementations of linear algebra primitives
126 : ! (C++ and C# versions, x86/x64 platform)
127 : !
128 : ! We recommend you to read 'Working with commercial version' section of
129 : ! ALGLIB Reference Manual in order to find out how to use performance-
130 : ! related features provided by commercial edition of ALGLIB.
131 :
132 : INPUT PARAMETERS:
133 : X - dataset, array[0..NPoints-1,0..NVars-1].
134 : matrix contains ONLY INDEPENDENT VARIABLES.
135 : NPoints - dataset size, NPoints>=0
136 : NVars - number of independent variables, NVars>=1
137 :
138 : OUTPUT PARAMETERS:
139 : Info - return code:
140 : * -4, if SVD subroutine haven't converged
141 : * -1, if wrong parameters has been passed (NPoints<0,
142 : NVars<1)
143 : * 1, if task is solved
144 : S2 - array[0..NVars-1]. variance values corresponding
145 : to basis vectors.
146 : V - array[0..NVars-1,0..NVars-1]
147 : matrix, whose columns store basis vectors.
148 :
149 : -- ALGLIB --
150 : Copyright 25.08.2008 by Bochkanov Sergey
151 : *************************************************************************/
152 0 : void pcabuildbasis(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
153 : {
154 : jmp_buf _break_jump;
155 : alglib_impl::ae_state _alglib_env_state;
156 0 : alglib_impl::ae_state_init(&_alglib_env_state);
157 0 : if( setjmp(_break_jump) )
158 : {
159 : #if !defined(AE_NO_EXCEPTIONS)
160 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
161 : #else
162 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
163 : return;
164 : #endif
165 : }
166 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
167 0 : if( _xparams.flags!=0x0 )
168 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
169 0 : alglib_impl::pcabuildbasis(const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
170 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
171 0 : return;
172 : }
173 :
174 : /*************************************************************************
175 : Principal components analysis
176 :
177 : This function performs truncated PCA, i.e. returns just a few most important
178 : directions.
179 :
180 : Internally it uses iterative eigensolver which is very efficient when only
181 : a minor fraction of full basis is required. Thus, if you need full basis,
182 : it is better to use pcabuildbasis() function.
183 :
184 : It should be noted that, unlike LDA, PCA does not use class labels.
185 :
186 : ! COMMERCIAL EDITION OF ALGLIB:
187 : !
188 : ! Commercial Edition of ALGLIB includes following important improvements
189 : ! of this function:
190 : ! * high-performance native backend with same C# interface (C# version)
191 : ! * multithreading support (C++ and C# versions)
192 : ! * hardware vendor (Intel) implementations of linear algebra primitives
193 : ! (C++ and C# versions, x86/x64 platform)
194 : !
195 : ! We recommend you to read 'Working with commercial version' section of
196 : ! ALGLIB Reference Manual in order to find out how to use performance-
197 : ! related features provided by commercial edition of ALGLIB.
198 :
199 : INPUT PARAMETERS:
200 : X - dataset, array[0..NPoints-1,0..NVars-1].
201 : matrix contains ONLY INDEPENDENT VARIABLES.
202 : NPoints - dataset size, NPoints>=0
203 : NVars - number of independent variables, NVars>=1
204 : NNeeded - number of requested components, in [1,NVars] range;
205 : this function is efficient only for NNeeded<<NVars.
206 : Eps - desired precision of vectors returned; underlying
207 : solver will stop iterations as soon as absolute error
208 : in corresponding singular values reduces to roughly
209 : eps*MAX(lambda[]), with lambda[] being array of eigen
210 : values.
211 : Zero value means that algorithm performs number of
212 : iterations specified by maxits parameter, without
213 : paying attention to precision.
214 : MaxIts - number of iterations performed by subspace iteration
215 : method. Zero value means that no limit on iteration
216 : count is placed (eps-based stopping condition is used).
217 :
218 :
219 : OUTPUT PARAMETERS:
220 : S2 - array[NNeeded]. Variance values corresponding
221 : to basis vectors.
222 : V - array[NVars,NNeeded]
223 : matrix, whose columns store basis vectors.
224 :
225 : NOTE: passing eps=0 and maxits=0 results in small eps being selected as
226 : stopping condition. Exact value of automatically selected eps is version-
227 : -dependent.
228 :
229 : -- ALGLIB --
230 : Copyright 10.01.2017 by Bochkanov Sergey
231 : *************************************************************************/
232 0 : void pcatruncatedsubspace(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
233 : {
234 : jmp_buf _break_jump;
235 : alglib_impl::ae_state _alglib_env_state;
236 0 : alglib_impl::ae_state_init(&_alglib_env_state);
237 0 : if( setjmp(_break_jump) )
238 : {
239 : #if !defined(AE_NO_EXCEPTIONS)
240 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
241 : #else
242 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
243 : return;
244 : #endif
245 : }
246 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
247 0 : if( _xparams.flags!=0x0 )
248 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
249 0 : alglib_impl::pcatruncatedsubspace(const_cast<alglib_impl::ae_matrix*>(x.c_ptr()), npoints, nvars, nneeded, eps, maxits, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
250 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
251 0 : return;
252 : }
253 :
254 : /*************************************************************************
255 : Sparse truncated principal components analysis
256 :
257 : This function performs sparse truncated PCA, i.e. returns just a few most
258 : important principal components for a sparse input X.
259 :
260 : Internally it uses iterative eigensolver which is very efficient when only
261 : a minor fraction of full basis is required.
262 :
263 : It should be noted that, unlike LDA, PCA does not use class labels.
264 :
265 : ! COMMERCIAL EDITION OF ALGLIB:
266 : !
267 : ! Commercial Edition of ALGLIB includes following important improvements
268 : ! of this function:
269 : ! * high-performance native backend with same C# interface (C# version)
270 : ! * multithreading support (C++ and C# versions)
271 : ! * hardware vendor (Intel) implementations of linear algebra primitives
272 : ! (C++ and C# versions, x86/x64 platform)
273 : !
274 : ! We recommend you to read 'Working with commercial version' section of
275 : ! ALGLIB Reference Manual in order to find out how to use performance-
276 : ! related features provided by commercial edition of ALGLIB.
277 :
278 : INPUT PARAMETERS:
279 : X - sparse dataset, sparse npoints*nvars matrix. It is
280 : recommended to use CRS sparse storage format; non-CRS
281 : input will be internally converted to CRS.
282 : Matrix contains ONLY INDEPENDENT VARIABLES, and must
283 : be EXACTLY npoints*nvars.
284 : NPoints - dataset size, NPoints>=0
285 : NVars - number of independent variables, NVars>=1
286 : NNeeded - number of requested components, in [1,NVars] range;
287 : this function is efficient only for NNeeded<<NVars.
288 : Eps - desired precision of vectors returned; underlying
289 : solver will stop iterations as soon as absolute error
290 : in corresponding singular values reduces to roughly
291 : eps*MAX(lambda[]), with lambda[] being array of eigen
292 : values.
293 : Zero value means that algorithm performs number of
294 : iterations specified by maxits parameter, without
295 : paying attention to precision.
296 : MaxIts - number of iterations performed by subspace iteration
297 : method. Zero value means that no limit on iteration
298 : count is placed (eps-based stopping condition is used).
299 :
300 :
301 : OUTPUT PARAMETERS:
302 : S2 - array[NNeeded]. Variance values corresponding
303 : to basis vectors.
304 : V - array[NVars,NNeeded]
305 : matrix, whose columns store basis vectors.
306 :
307 : NOTE: passing eps=0 and maxits=0 results in small eps being selected as
308 : a stopping condition. Exact value of automatically selected eps is
309 : version-dependent.
310 :
311 : NOTE: zero MaxIts is silently replaced by some reasonable value which
312 : prevents eternal loops (possible when inputs are degenerate and too
313 : stringent stopping criteria are specified). In current version it
314 : is 50+2*NVars.
315 :
316 : -- ALGLIB --
317 : Copyright 10.01.2017 by Bochkanov Sergey
318 : *************************************************************************/
319 0 : void pcatruncatedsubspacesparse(const sparsematrix &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams)
320 : {
321 : jmp_buf _break_jump;
322 : alglib_impl::ae_state _alglib_env_state;
323 0 : alglib_impl::ae_state_init(&_alglib_env_state);
324 0 : if( setjmp(_break_jump) )
325 : {
326 : #if !defined(AE_NO_EXCEPTIONS)
327 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
328 : #else
329 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
330 : return;
331 : #endif
332 : }
333 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
334 0 : if( _xparams.flags!=0x0 )
335 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
336 0 : alglib_impl::pcatruncatedsubspacesparse(const_cast<alglib_impl::sparsematrix*>(x.c_ptr()), npoints, nvars, nneeded, eps, maxits, const_cast<alglib_impl::ae_vector*>(s2.c_ptr()), const_cast<alglib_impl::ae_matrix*>(v.c_ptr()), &_alglib_env_state);
337 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
338 0 : return;
339 : }
340 : #endif
341 :
342 : #if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
343 : /*************************************************************************
344 : Optimal binary classification
345 :
346 : Algorithms finds optimal (=with minimal cross-entropy) binary partition.
347 : Internal subroutine.
348 :
349 : INPUT PARAMETERS:
350 : A - array[0..N-1], variable
351 : C - array[0..N-1], class numbers (0 or 1).
352 : N - array size
353 :
354 : OUTPUT PARAMETERS:
355 : Info - completetion code:
356 : * -3, all values of A[] are same (partition is impossible)
357 : * -2, one of C[] is incorrect (<0, >1)
358 : * -1, incorrect pararemets were passed (N<=0).
359 : * 1, OK
360 : Threshold- partiton boundary. Left part contains values which are
361 : strictly less than Threshold. Right part contains values
362 : which are greater than or equal to Threshold.
363 : PAL, PBL- probabilities P(0|v<Threshold) and P(1|v<Threshold)
364 : PAR, PBR- probabilities P(0|v>=Threshold) and P(1|v>=Threshold)
365 : CVE - cross-validation estimate of cross-entropy
366 :
367 : -- ALGLIB --
368 : Copyright 22.05.2008 by Bochkanov Sergey
369 : *************************************************************************/
370 0 : void dsoptimalsplit2(const real_1d_array &a, const integer_1d_array &c, const ae_int_t n, ae_int_t &info, double &threshold, double &pal, double &pbl, double &par, double &pbr, double &cve, const xparams _xparams)
371 : {
372 : jmp_buf _break_jump;
373 : alglib_impl::ae_state _alglib_env_state;
374 0 : alglib_impl::ae_state_init(&_alglib_env_state);
375 0 : if( setjmp(_break_jump) )
376 : {
377 : #if !defined(AE_NO_EXCEPTIONS)
378 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
379 : #else
380 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
381 : return;
382 : #endif
383 : }
384 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
385 0 : if( _xparams.flags!=0x0 )
386 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
387 0 : alglib_impl::dsoptimalsplit2(const_cast<alglib_impl::ae_vector*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(c.c_ptr()), n, &info, &threshold, &pal, &pbl, &par, &pbr, &cve, &_alglib_env_state);
388 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
389 0 : return;
390 : }
391 :
392 : /*************************************************************************
393 : Optimal partition, internal subroutine. Fast version.
394 :
395 : Accepts:
396 : A array[0..N-1] array of attributes array[0..N-1]
397 : C array[0..N-1] array of class labels
398 : TiesBuf array[0..N] temporaries (ties)
399 : CntBuf array[0..2*NC-1] temporaries (counts)
400 : Alpha centering factor (0<=alpha<=1, recommended value - 0.05)
401 : BufR array[0..N-1] temporaries
402 : BufI array[0..N-1] temporaries
403 :
404 : Output:
405 : Info error code (">0"=OK, "<0"=bad)
406 : RMS training set RMS error
407 : CVRMS leave-one-out RMS error
408 :
409 : Note:
410 : content of all arrays is changed by subroutine;
411 : it doesn't allocate temporaries.
412 :
413 : -- ALGLIB --
414 : Copyright 11.12.2008 by Bochkanov Sergey
415 : *************************************************************************/
416 0 : void dsoptimalsplit2fast(real_1d_array &a, integer_1d_array &c, integer_1d_array &tiesbuf, integer_1d_array &cntbuf, real_1d_array &bufr, integer_1d_array &bufi, const ae_int_t n, const ae_int_t nc, const double alpha, ae_int_t &info, double &threshold, double &rms, double &cvrms, const xparams _xparams)
417 : {
418 : jmp_buf _break_jump;
419 : alglib_impl::ae_state _alglib_env_state;
420 0 : alglib_impl::ae_state_init(&_alglib_env_state);
421 0 : if( setjmp(_break_jump) )
422 : {
423 : #if !defined(AE_NO_EXCEPTIONS)
424 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
425 : #else
426 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
427 : return;
428 : #endif
429 : }
430 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
431 0 : if( _xparams.flags!=0x0 )
432 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
433 0 : alglib_impl::dsoptimalsplit2fast(const_cast<alglib_impl::ae_vector*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(tiesbuf.c_ptr()), const_cast<alglib_impl::ae_vector*>(cntbuf.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufr.c_ptr()), const_cast<alglib_impl::ae_vector*>(bufi.c_ptr()), n, nc, alpha, &info, &threshold, &rms, &cvrms, &_alglib_env_state);
434 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
435 0 : return;
436 : }
437 : #endif
438 :
439 : #if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
440 : /*************************************************************************
441 : Model's errors:
442 : * RelCLSError - fraction of misclassified cases.
443 : * AvgCE - acerage cross-entropy
444 : * RMSError - root-mean-square error
445 : * AvgError - average error
446 : * AvgRelError - average relative error
447 :
448 : NOTE 1: RelCLSError/AvgCE are zero on regression problems.
449 :
450 : NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
451 : errors in prediction of posterior probabilities
452 : *************************************************************************/
453 0 : _modelerrors_owner::_modelerrors_owner()
454 : {
455 : jmp_buf _break_jump;
456 : alglib_impl::ae_state _state;
457 :
458 0 : alglib_impl::ae_state_init(&_state);
459 0 : if( setjmp(_break_jump) )
460 : {
461 0 : if( p_struct!=NULL )
462 : {
463 0 : alglib_impl::_modelerrors_destroy(p_struct);
464 0 : alglib_impl::ae_free(p_struct);
465 : }
466 0 : p_struct = NULL;
467 : #if !defined(AE_NO_EXCEPTIONS)
468 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
469 : #else
470 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
471 : return;
472 : #endif
473 : }
474 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
475 0 : p_struct = NULL;
476 0 : p_struct = (alglib_impl::modelerrors*)alglib_impl::ae_malloc(sizeof(alglib_impl::modelerrors), &_state);
477 0 : memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
478 0 : alglib_impl::_modelerrors_init(p_struct, &_state, ae_false);
479 0 : ae_state_clear(&_state);
480 0 : }
481 :
482 0 : _modelerrors_owner::_modelerrors_owner(const _modelerrors_owner &rhs)
483 : {
484 : jmp_buf _break_jump;
485 : alglib_impl::ae_state _state;
486 :
487 0 : alglib_impl::ae_state_init(&_state);
488 0 : if( setjmp(_break_jump) )
489 : {
490 0 : if( p_struct!=NULL )
491 : {
492 0 : alglib_impl::_modelerrors_destroy(p_struct);
493 0 : alglib_impl::ae_free(p_struct);
494 : }
495 0 : p_struct = NULL;
496 : #if !defined(AE_NO_EXCEPTIONS)
497 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
498 : #else
499 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
500 : return;
501 : #endif
502 : }
503 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
504 0 : p_struct = NULL;
505 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: modelerrors copy constructor failure (source is not initialized)", &_state);
506 0 : p_struct = (alglib_impl::modelerrors*)alglib_impl::ae_malloc(sizeof(alglib_impl::modelerrors), &_state);
507 0 : memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
508 0 : alglib_impl::_modelerrors_init_copy(p_struct, const_cast<alglib_impl::modelerrors*>(rhs.p_struct), &_state, ae_false);
509 0 : ae_state_clear(&_state);
510 0 : }
511 :
512 0 : _modelerrors_owner& _modelerrors_owner::operator=(const _modelerrors_owner &rhs)
513 : {
514 0 : if( this==&rhs )
515 0 : return *this;
516 : jmp_buf _break_jump;
517 : alglib_impl::ae_state _state;
518 :
519 0 : alglib_impl::ae_state_init(&_state);
520 0 : if( setjmp(_break_jump) )
521 : {
522 : #if !defined(AE_NO_EXCEPTIONS)
523 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
524 : #else
525 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
526 : return *this;
527 : #endif
528 : }
529 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
530 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: modelerrors assignment constructor failure (destination is not initialized)", &_state);
531 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: modelerrors assignment constructor failure (source is not initialized)", &_state);
532 0 : alglib_impl::_modelerrors_destroy(p_struct);
533 0 : memset(p_struct, 0, sizeof(alglib_impl::modelerrors));
534 0 : alglib_impl::_modelerrors_init_copy(p_struct, const_cast<alglib_impl::modelerrors*>(rhs.p_struct), &_state, ae_false);
535 0 : ae_state_clear(&_state);
536 0 : return *this;
537 : }
538 :
539 0 : _modelerrors_owner::~_modelerrors_owner()
540 : {
541 0 : if( p_struct!=NULL )
542 : {
543 0 : alglib_impl::_modelerrors_destroy(p_struct);
544 0 : ae_free(p_struct);
545 : }
546 0 : }
547 :
548 0 : alglib_impl::modelerrors* _modelerrors_owner::c_ptr()
549 : {
550 0 : return p_struct;
551 : }
552 :
553 0 : alglib_impl::modelerrors* _modelerrors_owner::c_ptr() const
554 : {
555 0 : return const_cast<alglib_impl::modelerrors*>(p_struct);
556 : }
557 0 : modelerrors::modelerrors() : _modelerrors_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
558 : {
559 0 : }
560 :
561 0 : modelerrors::modelerrors(const modelerrors &rhs):_modelerrors_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
562 : {
563 0 : }
564 :
565 0 : modelerrors& modelerrors::operator=(const modelerrors &rhs)
566 : {
567 0 : if( this==&rhs )
568 0 : return *this;
569 0 : _modelerrors_owner::operator=(rhs);
570 0 : return *this;
571 : }
572 :
573 0 : modelerrors::~modelerrors()
574 : {
575 0 : }
576 :
577 :
578 : /*************************************************************************
579 :
580 : *************************************************************************/
581 0 : _multilayerperceptron_owner::_multilayerperceptron_owner()
582 : {
583 : jmp_buf _break_jump;
584 : alglib_impl::ae_state _state;
585 :
586 0 : alglib_impl::ae_state_init(&_state);
587 0 : if( setjmp(_break_jump) )
588 : {
589 0 : if( p_struct!=NULL )
590 : {
591 0 : alglib_impl::_multilayerperceptron_destroy(p_struct);
592 0 : alglib_impl::ae_free(p_struct);
593 : }
594 0 : p_struct = NULL;
595 : #if !defined(AE_NO_EXCEPTIONS)
596 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
597 : #else
598 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
599 : return;
600 : #endif
601 : }
602 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
603 0 : p_struct = NULL;
604 0 : p_struct = (alglib_impl::multilayerperceptron*)alglib_impl::ae_malloc(sizeof(alglib_impl::multilayerperceptron), &_state);
605 0 : memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
606 0 : alglib_impl::_multilayerperceptron_init(p_struct, &_state, ae_false);
607 0 : ae_state_clear(&_state);
608 0 : }
609 :
610 0 : _multilayerperceptron_owner::_multilayerperceptron_owner(const _multilayerperceptron_owner &rhs)
611 : {
612 : jmp_buf _break_jump;
613 : alglib_impl::ae_state _state;
614 :
615 0 : alglib_impl::ae_state_init(&_state);
616 0 : if( setjmp(_break_jump) )
617 : {
618 0 : if( p_struct!=NULL )
619 : {
620 0 : alglib_impl::_multilayerperceptron_destroy(p_struct);
621 0 : alglib_impl::ae_free(p_struct);
622 : }
623 0 : p_struct = NULL;
624 : #if !defined(AE_NO_EXCEPTIONS)
625 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
626 : #else
627 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
628 : return;
629 : #endif
630 : }
631 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
632 0 : p_struct = NULL;
633 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: multilayerperceptron copy constructor failure (source is not initialized)", &_state);
634 0 : p_struct = (alglib_impl::multilayerperceptron*)alglib_impl::ae_malloc(sizeof(alglib_impl::multilayerperceptron), &_state);
635 0 : memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
636 0 : alglib_impl::_multilayerperceptron_init_copy(p_struct, const_cast<alglib_impl::multilayerperceptron*>(rhs.p_struct), &_state, ae_false);
637 0 : ae_state_clear(&_state);
638 0 : }
639 :
640 0 : _multilayerperceptron_owner& _multilayerperceptron_owner::operator=(const _multilayerperceptron_owner &rhs)
641 : {
642 0 : if( this==&rhs )
643 0 : return *this;
644 : jmp_buf _break_jump;
645 : alglib_impl::ae_state _state;
646 :
647 0 : alglib_impl::ae_state_init(&_state);
648 0 : if( setjmp(_break_jump) )
649 : {
650 : #if !defined(AE_NO_EXCEPTIONS)
651 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
652 : #else
653 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
654 : return *this;
655 : #endif
656 : }
657 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
658 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: multilayerperceptron assignment constructor failure (destination is not initialized)", &_state);
659 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: multilayerperceptron assignment constructor failure (source is not initialized)", &_state);
660 0 : alglib_impl::_multilayerperceptron_destroy(p_struct);
661 0 : memset(p_struct, 0, sizeof(alglib_impl::multilayerperceptron));
662 0 : alglib_impl::_multilayerperceptron_init_copy(p_struct, const_cast<alglib_impl::multilayerperceptron*>(rhs.p_struct), &_state, ae_false);
663 0 : ae_state_clear(&_state);
664 0 : return *this;
665 : }
666 :
667 0 : _multilayerperceptron_owner::~_multilayerperceptron_owner()
668 : {
669 0 : if( p_struct!=NULL )
670 : {
671 0 : alglib_impl::_multilayerperceptron_destroy(p_struct);
672 0 : ae_free(p_struct);
673 : }
674 0 : }
675 :
676 0 : alglib_impl::multilayerperceptron* _multilayerperceptron_owner::c_ptr()
677 : {
678 0 : return p_struct;
679 : }
680 :
681 0 : alglib_impl::multilayerperceptron* _multilayerperceptron_owner::c_ptr() const
682 : {
683 0 : return const_cast<alglib_impl::multilayerperceptron*>(p_struct);
684 : }
685 0 : multilayerperceptron::multilayerperceptron() : _multilayerperceptron_owner()
686 : {
687 0 : }
688 :
689 0 : multilayerperceptron::multilayerperceptron(const multilayerperceptron &rhs):_multilayerperceptron_owner(rhs)
690 : {
691 0 : }
692 :
693 0 : multilayerperceptron& multilayerperceptron::operator=(const multilayerperceptron &rhs)
694 : {
695 0 : if( this==&rhs )
696 0 : return *this;
697 0 : _multilayerperceptron_owner::operator=(rhs);
698 0 : return *this;
699 : }
700 :
701 0 : multilayerperceptron::~multilayerperceptron()
702 : {
703 0 : }
704 :
705 :
706 : /*************************************************************************
707 : This function serializes data structure to string.
708 :
709 : Important properties of s_out:
710 : * it contains alphanumeric characters, dots, underscores, minus signs
711 : * these symbols are grouped into words, which are separated by spaces
712 : and Windows-style (CR+LF) newlines
713 : * although serializer uses spaces and CR+LF as separators, you can
714 : replace any separator character by arbitrary combination of spaces,
715 : tabs, Windows or Unix newlines. It allows flexible reformatting of
716 : the string in case you want to include it into text or XML file.
717 : But you should not insert separators into the middle of the "words"
718 : nor you should change case of letters.
719 : * s_out can be freely moved between 32-bit and 64-bit systems, little
720 : and big endian machines, and so on. You can serialize structure on
721 : 32-bit machine and unserialize it on 64-bit one (or vice versa), or
722 : serialize it on SPARC and unserialize on x86. You can also
723 : serialize it in C++ version of ALGLIB and unserialize in C# one,
724 : and vice versa.
725 : *************************************************************************/
726 0 : void mlpserialize(multilayerperceptron &obj, std::string &s_out)
727 : {
728 : jmp_buf _break_jump;
729 : alglib_impl::ae_state state;
730 : alglib_impl::ae_serializer serializer;
731 : alglib_impl::ae_int_t ssize;
732 :
733 0 : alglib_impl::ae_state_init(&state);
734 0 : if( setjmp(_break_jump) )
735 : {
736 : #if !defined(AE_NO_EXCEPTIONS)
737 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
738 : #else
739 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
740 : return;
741 : #endif
742 : }
743 0 : ae_state_set_break_jump(&state, &_break_jump);
744 0 : alglib_impl::ae_serializer_init(&serializer);
745 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
746 0 : alglib_impl::mlpalloc(&serializer, obj.c_ptr(), &state);
747 0 : ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
748 0 : s_out.clear();
749 0 : s_out.reserve((size_t)(ssize+1));
750 0 : alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
751 0 : alglib_impl::mlpserialize(&serializer, obj.c_ptr(), &state);
752 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
753 0 : alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
754 0 : alglib_impl::ae_serializer_clear(&serializer);
755 0 : alglib_impl::ae_state_clear(&state);
756 0 : }
757 : /*************************************************************************
758 : This function unserializes data structure from string.
759 : *************************************************************************/
760 0 : void mlpunserialize(const std::string &s_in, multilayerperceptron &obj)
761 : {
762 : jmp_buf _break_jump;
763 : alglib_impl::ae_state state;
764 : alglib_impl::ae_serializer serializer;
765 :
766 0 : alglib_impl::ae_state_init(&state);
767 0 : if( setjmp(_break_jump) )
768 : {
769 : #if !defined(AE_NO_EXCEPTIONS)
770 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
771 : #else
772 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
773 : return;
774 : #endif
775 : }
776 0 : ae_state_set_break_jump(&state, &_break_jump);
777 0 : alglib_impl::ae_serializer_init(&serializer);
778 0 : alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
779 0 : alglib_impl::mlpunserialize(&serializer, obj.c_ptr(), &state);
780 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
781 0 : alglib_impl::ae_serializer_clear(&serializer);
782 0 : alglib_impl::ae_state_clear(&state);
783 0 : }
784 :
785 :
786 : /*************************************************************************
787 : This function serializes data structure to C++ stream.
788 :
789 : Data stream generated by this function is same as string representation
790 : generated by string version of serializer - alphanumeric characters,
791 : dots, underscores, minus signs, which are grouped into words separated by
792 : spaces and CR+LF.
793 :
794 : We recommend you to read comments on string version of serializer to find
795 : out more about serialization of AlGLIB objects.
796 : *************************************************************************/
797 0 : void mlpserialize(multilayerperceptron &obj, std::ostream &s_out)
798 : {
799 : jmp_buf _break_jump;
800 : alglib_impl::ae_state state;
801 : alglib_impl::ae_serializer serializer;
802 :
803 0 : alglib_impl::ae_state_init(&state);
804 0 : if( setjmp(_break_jump) )
805 : {
806 : #if !defined(AE_NO_EXCEPTIONS)
807 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
808 : #else
809 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
810 : return;
811 : #endif
812 : }
813 0 : ae_state_set_break_jump(&state, &_break_jump);
814 0 : alglib_impl::ae_serializer_init(&serializer);
815 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
816 0 : alglib_impl::mlpalloc(&serializer, obj.c_ptr(), &state);
817 0 : alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
818 0 : alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
819 0 : alglib_impl::mlpserialize(&serializer, obj.c_ptr(), &state);
820 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
821 0 : alglib_impl::ae_serializer_clear(&serializer);
822 0 : alglib_impl::ae_state_clear(&state);
823 0 : }
824 : /*************************************************************************
825 : This function unserializes data structure from stream.
826 : *************************************************************************/
827 0 : void mlpunserialize(const std::istream &s_in, multilayerperceptron &obj)
828 : {
829 : jmp_buf _break_jump;
830 : alglib_impl::ae_state state;
831 : alglib_impl::ae_serializer serializer;
832 :
833 0 : alglib_impl::ae_state_init(&state);
834 0 : if( setjmp(_break_jump) )
835 : {
836 : #if !defined(AE_NO_EXCEPTIONS)
837 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
838 : #else
839 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
840 : return;
841 : #endif
842 : }
843 0 : ae_state_set_break_jump(&state, &_break_jump);
844 0 : alglib_impl::ae_serializer_init(&serializer);
845 0 : alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
846 0 : alglib_impl::mlpunserialize(&serializer, obj.c_ptr(), &state);
847 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
848 0 : alglib_impl::ae_serializer_clear(&serializer);
849 0 : alglib_impl::ae_state_clear(&state);
850 0 : }
851 :
852 : /*************************************************************************
853 : Creates neural network with NIn inputs, NOut outputs, without hidden
854 : layers, with linear output layer. Network weights are filled with small
855 : random values.
856 :
857 : -- ALGLIB --
858 : Copyright 04.11.2007 by Bochkanov Sergey
859 : *************************************************************************/
860 0 : void mlpcreate0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
861 : {
862 : jmp_buf _break_jump;
863 : alglib_impl::ae_state _alglib_env_state;
864 0 : alglib_impl::ae_state_init(&_alglib_env_state);
865 0 : if( setjmp(_break_jump) )
866 : {
867 : #if !defined(AE_NO_EXCEPTIONS)
868 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
869 : #else
870 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
871 : return;
872 : #endif
873 : }
874 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
875 0 : if( _xparams.flags!=0x0 )
876 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
877 0 : alglib_impl::mlpcreate0(nin, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
878 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
879 0 : return;
880 : }
881 :
882 : /*************************************************************************
883 : Same as MLPCreate0, but with one hidden layer (NHid neurons) with
884 : non-linear activation function. Output layer is linear.
885 :
886 : -- ALGLIB --
887 : Copyright 04.11.2007 by Bochkanov Sergey
888 : *************************************************************************/
889 0 : void mlpcreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
890 : {
891 : jmp_buf _break_jump;
892 : alglib_impl::ae_state _alglib_env_state;
893 0 : alglib_impl::ae_state_init(&_alglib_env_state);
894 0 : if( setjmp(_break_jump) )
895 : {
896 : #if !defined(AE_NO_EXCEPTIONS)
897 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
898 : #else
899 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
900 : return;
901 : #endif
902 : }
903 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
904 0 : if( _xparams.flags!=0x0 )
905 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
906 0 : alglib_impl::mlpcreate1(nin, nhid, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
907 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
908 0 : return;
909 : }
910 :
911 : /*************************************************************************
912 : Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons)
913 : with non-linear activation function. Output layer is linear.
914 : $ALL
915 :
916 : -- ALGLIB --
917 : Copyright 04.11.2007 by Bochkanov Sergey
918 : *************************************************************************/
919 0 : void mlpcreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
920 : {
921 : jmp_buf _break_jump;
922 : alglib_impl::ae_state _alglib_env_state;
923 0 : alglib_impl::ae_state_init(&_alglib_env_state);
924 0 : if( setjmp(_break_jump) )
925 : {
926 : #if !defined(AE_NO_EXCEPTIONS)
927 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
928 : #else
929 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
930 : return;
931 : #endif
932 : }
933 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
934 0 : if( _xparams.flags!=0x0 )
935 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
936 0 : alglib_impl::mlpcreate2(nin, nhid1, nhid2, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
937 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
938 0 : return;
939 : }
940 :
941 : /*************************************************************************
942 : Creates neural network with NIn inputs, NOut outputs, without hidden
943 : layers with non-linear output layer. Network weights are filled with small
944 : random values.
945 :
946 : Activation function of the output layer takes values:
947 :
948 : (B, +INF), if D>=0
949 :
950 : or
951 :
952 : (-INF, B), if D<0.
953 :
954 :
955 : -- ALGLIB --
956 : Copyright 30.03.2008 by Bochkanov Sergey
957 : *************************************************************************/
958 0 : void mlpcreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
959 : {
960 : jmp_buf _break_jump;
961 : alglib_impl::ae_state _alglib_env_state;
962 0 : alglib_impl::ae_state_init(&_alglib_env_state);
963 0 : if( setjmp(_break_jump) )
964 : {
965 : #if !defined(AE_NO_EXCEPTIONS)
966 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
967 : #else
968 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
969 : return;
970 : #endif
971 : }
972 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
973 0 : if( _xparams.flags!=0x0 )
974 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
975 0 : alglib_impl::mlpcreateb0(nin, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
976 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
977 0 : return;
978 : }
979 :
980 : /*************************************************************************
981 : Same as MLPCreateB0 but with non-linear hidden layer.
982 :
983 : -- ALGLIB --
984 : Copyright 30.03.2008 by Bochkanov Sergey
985 : *************************************************************************/
986 0 : void mlpcreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
987 : {
988 : jmp_buf _break_jump;
989 : alglib_impl::ae_state _alglib_env_state;
990 0 : alglib_impl::ae_state_init(&_alglib_env_state);
991 0 : if( setjmp(_break_jump) )
992 : {
993 : #if !defined(AE_NO_EXCEPTIONS)
994 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
995 : #else
996 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
997 : return;
998 : #endif
999 : }
1000 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1001 0 : if( _xparams.flags!=0x0 )
1002 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1003 0 : alglib_impl::mlpcreateb1(nin, nhid, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1004 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1005 0 : return;
1006 : }
1007 :
1008 : /*************************************************************************
1009 : Same as MLPCreateB0 but with two non-linear hidden layers.
1010 :
1011 : -- ALGLIB --
1012 : Copyright 30.03.2008 by Bochkanov Sergey
1013 : *************************************************************************/
1014 0 : void mlpcreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams)
1015 : {
1016 : jmp_buf _break_jump;
1017 : alglib_impl::ae_state _alglib_env_state;
1018 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1019 0 : if( setjmp(_break_jump) )
1020 : {
1021 : #if !defined(AE_NO_EXCEPTIONS)
1022 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1023 : #else
1024 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1025 : return;
1026 : #endif
1027 : }
1028 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1029 0 : if( _xparams.flags!=0x0 )
1030 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1031 0 : alglib_impl::mlpcreateb2(nin, nhid1, nhid2, nout, b, d, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1032 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1033 0 : return;
1034 : }
1035 :
1036 : /*************************************************************************
1037 : Creates neural network with NIn inputs, NOut outputs, without hidden
1038 : layers with non-linear output layer. Network weights are filled with small
1039 : random values. Activation function of the output layer takes values [A,B].
1040 :
1041 : -- ALGLIB --
1042 : Copyright 30.03.2008 by Bochkanov Sergey
1043 : *************************************************************************/
1044 0 : void mlpcreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
1045 : {
1046 : jmp_buf _break_jump;
1047 : alglib_impl::ae_state _alglib_env_state;
1048 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1049 0 : if( setjmp(_break_jump) )
1050 : {
1051 : #if !defined(AE_NO_EXCEPTIONS)
1052 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1053 : #else
1054 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1055 : return;
1056 : #endif
1057 : }
1058 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1059 0 : if( _xparams.flags!=0x0 )
1060 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1061 0 : alglib_impl::mlpcreater0(nin, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1062 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1063 0 : return;
1064 : }
1065 :
1066 : /*************************************************************************
1067 : Same as MLPCreateR0, but with non-linear hidden layer.
1068 :
1069 : -- ALGLIB --
1070 : Copyright 30.03.2008 by Bochkanov Sergey
1071 : *************************************************************************/
1072 0 : void mlpcreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
1073 : {
1074 : jmp_buf _break_jump;
1075 : alglib_impl::ae_state _alglib_env_state;
1076 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1077 0 : if( setjmp(_break_jump) )
1078 : {
1079 : #if !defined(AE_NO_EXCEPTIONS)
1080 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1081 : #else
1082 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1083 : return;
1084 : #endif
1085 : }
1086 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1087 0 : if( _xparams.flags!=0x0 )
1088 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1089 0 : alglib_impl::mlpcreater1(nin, nhid, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1090 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1091 0 : return;
1092 : }
1093 :
1094 : /*************************************************************************
1095 : Same as MLPCreateR0, but with two non-linear hidden layers.
1096 :
1097 : -- ALGLIB --
1098 : Copyright 30.03.2008 by Bochkanov Sergey
1099 : *************************************************************************/
1100 0 : void mlpcreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams)
1101 : {
1102 : jmp_buf _break_jump;
1103 : alglib_impl::ae_state _alglib_env_state;
1104 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1105 0 : if( setjmp(_break_jump) )
1106 : {
1107 : #if !defined(AE_NO_EXCEPTIONS)
1108 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1109 : #else
1110 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1111 : return;
1112 : #endif
1113 : }
1114 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1115 0 : if( _xparams.flags!=0x0 )
1116 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1117 0 : alglib_impl::mlpcreater2(nin, nhid1, nhid2, nout, a, b, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1118 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1119 0 : return;
1120 : }
1121 :
1122 : /*************************************************************************
1123 : Creates classifier network with NIn inputs and NOut possible classes.
1124 : Network contains no hidden layers and linear output layer with SOFTMAX-
1125 : normalization (so outputs sums up to 1.0 and converge to posterior
1126 : probabilities).
1127 :
1128 : -- ALGLIB --
1129 : Copyright 04.11.2007 by Bochkanov Sergey
1130 : *************************************************************************/
1131 0 : void mlpcreatec0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
1132 : {
1133 : jmp_buf _break_jump;
1134 : alglib_impl::ae_state _alglib_env_state;
1135 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1136 0 : if( setjmp(_break_jump) )
1137 : {
1138 : #if !defined(AE_NO_EXCEPTIONS)
1139 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1140 : #else
1141 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1142 : return;
1143 : #endif
1144 : }
1145 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1146 0 : if( _xparams.flags!=0x0 )
1147 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1148 0 : alglib_impl::mlpcreatec0(nin, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1149 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1150 0 : return;
1151 : }
1152 :
1153 : /*************************************************************************
1154 : Same as MLPCreateC0, but with one non-linear hidden layer.
1155 :
1156 : -- ALGLIB --
1157 : Copyright 04.11.2007 by Bochkanov Sergey
1158 : *************************************************************************/
1159 0 : void mlpcreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
1160 : {
1161 : jmp_buf _break_jump;
1162 : alglib_impl::ae_state _alglib_env_state;
1163 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1164 0 : if( setjmp(_break_jump) )
1165 : {
1166 : #if !defined(AE_NO_EXCEPTIONS)
1167 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1168 : #else
1169 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1170 : return;
1171 : #endif
1172 : }
1173 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1174 0 : if( _xparams.flags!=0x0 )
1175 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1176 0 : alglib_impl::mlpcreatec1(nin, nhid, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1177 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1178 0 : return;
1179 : }
1180 :
1181 : /*************************************************************************
1182 : Same as MLPCreateC0, but with two non-linear hidden layers.
1183 :
1184 : -- ALGLIB --
1185 : Copyright 04.11.2007 by Bochkanov Sergey
1186 : *************************************************************************/
1187 0 : void mlpcreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams)
1188 : {
1189 : jmp_buf _break_jump;
1190 : alglib_impl::ae_state _alglib_env_state;
1191 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1192 0 : if( setjmp(_break_jump) )
1193 : {
1194 : #if !defined(AE_NO_EXCEPTIONS)
1195 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1196 : #else
1197 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1198 : return;
1199 : #endif
1200 : }
1201 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1202 0 : if( _xparams.flags!=0x0 )
1203 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1204 0 : alglib_impl::mlpcreatec2(nin, nhid1, nhid2, nout, const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1205 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1206 0 : return;
1207 : }
1208 :
1209 : /*************************************************************************
1210 : Copying of neural network
1211 :
1212 : INPUT PARAMETERS:
1213 : Network1 - original
1214 :
1215 : OUTPUT PARAMETERS:
1216 : Network2 - copy
1217 :
1218 : -- ALGLIB --
1219 : Copyright 04.11.2007 by Bochkanov Sergey
1220 : *************************************************************************/
1221 0 : void mlpcopy(const multilayerperceptron &network1, multilayerperceptron &network2, const xparams _xparams)
1222 : {
1223 : jmp_buf _break_jump;
1224 : alglib_impl::ae_state _alglib_env_state;
1225 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1226 0 : if( setjmp(_break_jump) )
1227 : {
1228 : #if !defined(AE_NO_EXCEPTIONS)
1229 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1230 : #else
1231 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1232 : return;
1233 : #endif
1234 : }
1235 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1236 0 : if( _xparams.flags!=0x0 )
1237 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1238 0 : alglib_impl::mlpcopy(const_cast<alglib_impl::multilayerperceptron*>(network1.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network2.c_ptr()), &_alglib_env_state);
1239 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1240 0 : return;
1241 : }
1242 :
1243 : /*************************************************************************
1244 : This function copies tunable parameters (weights/means/sigmas) from one
1245 : network to another with same architecture. It performs some rudimentary
1246 : checks that architectures are same, and throws exception if check fails.
1247 :
1248 : It is intended for fast copying of states between two network which are
1249 : known to have same geometry.
1250 :
1251 : INPUT PARAMETERS:
1252 : Network1 - source, must be correctly initialized
1253 : Network2 - target, must have same architecture
1254 :
1255 : OUTPUT PARAMETERS:
1256 : Network2 - network state is copied from source to target
1257 :
1258 : -- ALGLIB --
1259 : Copyright 20.06.2013 by Bochkanov Sergey
1260 : *************************************************************************/
1261 0 : void mlpcopytunableparameters(const multilayerperceptron &network1, const multilayerperceptron &network2, const xparams _xparams)
1262 : {
1263 : jmp_buf _break_jump;
1264 : alglib_impl::ae_state _alglib_env_state;
1265 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1266 0 : if( setjmp(_break_jump) )
1267 : {
1268 : #if !defined(AE_NO_EXCEPTIONS)
1269 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1270 : #else
1271 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1272 : return;
1273 : #endif
1274 : }
1275 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1276 0 : if( _xparams.flags!=0x0 )
1277 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1278 0 : alglib_impl::mlpcopytunableparameters(const_cast<alglib_impl::multilayerperceptron*>(network1.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network2.c_ptr()), &_alglib_env_state);
1279 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1280 0 : return;
1281 : }
1282 :
1283 : /*************************************************************************
1284 : Randomization of neural network weights
1285 :
1286 : -- ALGLIB --
1287 : Copyright 06.11.2007 by Bochkanov Sergey
1288 : *************************************************************************/
1289 0 : void mlprandomize(const multilayerperceptron &network, const xparams _xparams)
1290 : {
1291 : jmp_buf _break_jump;
1292 : alglib_impl::ae_state _alglib_env_state;
1293 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1294 0 : if( setjmp(_break_jump) )
1295 : {
1296 : #if !defined(AE_NO_EXCEPTIONS)
1297 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1298 : #else
1299 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1300 : return;
1301 : #endif
1302 : }
1303 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1304 0 : if( _xparams.flags!=0x0 )
1305 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1306 0 : alglib_impl::mlprandomize(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1307 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1308 0 : return;
1309 : }
1310 :
1311 : /*************************************************************************
1312 : Randomization of neural network weights and standartisator
1313 :
1314 : -- ALGLIB --
1315 : Copyright 10.03.2008 by Bochkanov Sergey
1316 : *************************************************************************/
1317 0 : void mlprandomizefull(const multilayerperceptron &network, const xparams _xparams)
1318 : {
1319 : jmp_buf _break_jump;
1320 : alglib_impl::ae_state _alglib_env_state;
1321 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1322 0 : if( setjmp(_break_jump) )
1323 : {
1324 : #if !defined(AE_NO_EXCEPTIONS)
1325 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1326 : #else
1327 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1328 : return;
1329 : #endif
1330 : }
1331 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1332 0 : if( _xparams.flags!=0x0 )
1333 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1334 0 : alglib_impl::mlprandomizefull(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1335 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1336 0 : return;
1337 : }
1338 :
1339 : /*************************************************************************
1340 : Internal subroutine.
1341 :
1342 : -- ALGLIB --
1343 : Copyright 30.03.2008 by Bochkanov Sergey
1344 : *************************************************************************/
1345 0 : void mlpinitpreprocessor(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
1346 : {
1347 : jmp_buf _break_jump;
1348 : alglib_impl::ae_state _alglib_env_state;
1349 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1350 0 : if( setjmp(_break_jump) )
1351 : {
1352 : #if !defined(AE_NO_EXCEPTIONS)
1353 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1354 : #else
1355 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1356 : return;
1357 : #endif
1358 : }
1359 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1360 0 : if( _xparams.flags!=0x0 )
1361 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1362 0 : alglib_impl::mlpinitpreprocessor(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
1363 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1364 0 : return;
1365 : }
1366 :
1367 : /*************************************************************************
1368 : Returns information about initialized network: number of inputs, outputs,
1369 : weights.
1370 :
1371 : -- ALGLIB --
1372 : Copyright 04.11.2007 by Bochkanov Sergey
1373 : *************************************************************************/
1374 0 : void mlpproperties(const multilayerperceptron &network, ae_int_t &nin, ae_int_t &nout, ae_int_t &wcount, const xparams _xparams)
1375 : {
1376 : jmp_buf _break_jump;
1377 : alglib_impl::ae_state _alglib_env_state;
1378 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1379 0 : if( setjmp(_break_jump) )
1380 : {
1381 : #if !defined(AE_NO_EXCEPTIONS)
1382 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1383 : #else
1384 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1385 : return;
1386 : #endif
1387 : }
1388 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1389 0 : if( _xparams.flags!=0x0 )
1390 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1391 0 : alglib_impl::mlpproperties(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &nin, &nout, &wcount, &_alglib_env_state);
1392 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1393 0 : return;
1394 : }
1395 :
1396 : /*************************************************************************
1397 : Returns number of inputs.
1398 :
1399 : -- ALGLIB --
1400 : Copyright 19.10.2011 by Bochkanov Sergey
1401 : *************************************************************************/
1402 0 : ae_int_t mlpgetinputscount(const multilayerperceptron &network, const xparams _xparams)
1403 : {
1404 : jmp_buf _break_jump;
1405 : alglib_impl::ae_state _alglib_env_state;
1406 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1407 0 : if( setjmp(_break_jump) )
1408 : {
1409 : #if !defined(AE_NO_EXCEPTIONS)
1410 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1411 : #else
1412 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1413 : return 0;
1414 : #endif
1415 : }
1416 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1417 0 : if( _xparams.flags!=0x0 )
1418 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1419 0 : alglib_impl::ae_int_t result = alglib_impl::mlpgetinputscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1420 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1421 0 : return *(reinterpret_cast<ae_int_t*>(&result));
1422 : }
1423 :
1424 : /*************************************************************************
1425 : Returns number of outputs.
1426 :
1427 : -- ALGLIB --
1428 : Copyright 19.10.2011 by Bochkanov Sergey
1429 : *************************************************************************/
1430 0 : ae_int_t mlpgetoutputscount(const multilayerperceptron &network, const xparams _xparams)
1431 : {
1432 : jmp_buf _break_jump;
1433 : alglib_impl::ae_state _alglib_env_state;
1434 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1435 0 : if( setjmp(_break_jump) )
1436 : {
1437 : #if !defined(AE_NO_EXCEPTIONS)
1438 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1439 : #else
1440 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1441 : return 0;
1442 : #endif
1443 : }
1444 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1445 0 : if( _xparams.flags!=0x0 )
1446 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1447 0 : alglib_impl::ae_int_t result = alglib_impl::mlpgetoutputscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1448 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1449 0 : return *(reinterpret_cast<ae_int_t*>(&result));
1450 : }
1451 :
1452 : /*************************************************************************
1453 : Returns number of weights.
1454 :
1455 : -- ALGLIB --
1456 : Copyright 19.10.2011 by Bochkanov Sergey
1457 : *************************************************************************/
1458 0 : ae_int_t mlpgetweightscount(const multilayerperceptron &network, const xparams _xparams)
1459 : {
1460 : jmp_buf _break_jump;
1461 : alglib_impl::ae_state _alglib_env_state;
1462 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1463 0 : if( setjmp(_break_jump) )
1464 : {
1465 : #if !defined(AE_NO_EXCEPTIONS)
1466 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1467 : #else
1468 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1469 : return 0;
1470 : #endif
1471 : }
1472 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1473 0 : if( _xparams.flags!=0x0 )
1474 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1475 0 : alglib_impl::ae_int_t result = alglib_impl::mlpgetweightscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1476 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1477 0 : return *(reinterpret_cast<ae_int_t*>(&result));
1478 : }
1479 :
1480 : /*************************************************************************
1481 : Tells whether network is SOFTMAX-normalized (i.e. classifier) or not.
1482 :
1483 : -- ALGLIB --
1484 : Copyright 04.11.2007 by Bochkanov Sergey
1485 : *************************************************************************/
1486 0 : bool mlpissoftmax(const multilayerperceptron &network, const xparams _xparams)
1487 : {
1488 : jmp_buf _break_jump;
1489 : alglib_impl::ae_state _alglib_env_state;
1490 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1491 0 : if( setjmp(_break_jump) )
1492 : {
1493 : #if !defined(AE_NO_EXCEPTIONS)
1494 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1495 : #else
1496 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1497 : return 0;
1498 : #endif
1499 : }
1500 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1501 0 : if( _xparams.flags!=0x0 )
1502 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1503 0 : ae_bool result = alglib_impl::mlpissoftmax(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1504 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1505 0 : return *(reinterpret_cast<bool*>(&result));
1506 : }
1507 :
1508 : /*************************************************************************
1509 : This function returns total number of layers (including input, hidden and
1510 : output layers).
1511 :
1512 : -- ALGLIB --
1513 : Copyright 25.03.2011 by Bochkanov Sergey
1514 : *************************************************************************/
1515 0 : ae_int_t mlpgetlayerscount(const multilayerperceptron &network, const xparams _xparams)
1516 : {
1517 : jmp_buf _break_jump;
1518 : alglib_impl::ae_state _alglib_env_state;
1519 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1520 0 : if( setjmp(_break_jump) )
1521 : {
1522 : #if !defined(AE_NO_EXCEPTIONS)
1523 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1524 : #else
1525 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1526 : return 0;
1527 : #endif
1528 : }
1529 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1530 0 : if( _xparams.flags!=0x0 )
1531 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1532 0 : alglib_impl::ae_int_t result = alglib_impl::mlpgetlayerscount(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
1533 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1534 0 : return *(reinterpret_cast<ae_int_t*>(&result));
1535 : }
1536 :
1537 : /*************************************************************************
1538 : This function returns size of K-th layer.
1539 :
1540 : K=0 corresponds to input layer, K=CNT-1 corresponds to output layer.
1541 :
1542 : Size of the output layer is always equal to the number of outputs, although
1543 : when we have softmax-normalized network, last neuron doesn't have any
1544 : connections - it is just zero.
1545 :
1546 : -- ALGLIB --
1547 : Copyright 25.03.2011 by Bochkanov Sergey
1548 : *************************************************************************/
1549 0 : ae_int_t mlpgetlayersize(const multilayerperceptron &network, const ae_int_t k, const xparams _xparams)
1550 : {
1551 : jmp_buf _break_jump;
1552 : alglib_impl::ae_state _alglib_env_state;
1553 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1554 0 : if( setjmp(_break_jump) )
1555 : {
1556 : #if !defined(AE_NO_EXCEPTIONS)
1557 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1558 : #else
1559 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1560 : return 0;
1561 : #endif
1562 : }
1563 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1564 0 : if( _xparams.flags!=0x0 )
1565 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1566 0 : alglib_impl::ae_int_t result = alglib_impl::mlpgetlayersize(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, &_alglib_env_state);
1567 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1568 0 : return *(reinterpret_cast<ae_int_t*>(&result));
1569 : }
1570 :
1571 : /*************************************************************************
1572 : This function returns offset/scaling coefficients for I-th input of the
1573 : network.
1574 :
1575 : INPUT PARAMETERS:
1576 : Network - network
1577 : I - input index
1578 :
1579 : OUTPUT PARAMETERS:
1580 : Mean - mean term
1581 : Sigma - sigma term, guaranteed to be nonzero.
1582 :
1583 : I-th input is passed through linear transformation
1584 : IN[i] = (IN[i]-Mean)/Sigma
1585 : before feeding to the network
1586 :
1587 : -- ALGLIB --
1588 : Copyright 25.03.2011 by Bochkanov Sergey
1589 : *************************************************************************/
1590 0 : void mlpgetinputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams)
1591 : {
1592 : jmp_buf _break_jump;
1593 : alglib_impl::ae_state _alglib_env_state;
1594 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1595 0 : if( setjmp(_break_jump) )
1596 : {
1597 : #if !defined(AE_NO_EXCEPTIONS)
1598 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1599 : #else
1600 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1601 : return;
1602 : #endif
1603 : }
1604 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1605 0 : if( _xparams.flags!=0x0 )
1606 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1607 0 : alglib_impl::mlpgetinputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, &mean, &sigma, &_alglib_env_state);
1608 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1609 0 : return;
1610 : }
1611 :
1612 : /*************************************************************************
1613 : This function returns offset/scaling coefficients for I-th output of the
1614 : network.
1615 :
1616 : INPUT PARAMETERS:
1617 : Network - network
1618 : I - input index
1619 :
1620 : OUTPUT PARAMETERS:
1621 : Mean - mean term
1622 : Sigma - sigma term, guaranteed to be nonzero.
1623 :
1624 : I-th output is passed through linear transformation
1625 : OUT[i] = OUT[i]*Sigma+Mean
1626 : before returning it to user. In case we have SOFTMAX-normalized network,
1627 : we return (Mean,Sigma)=(0.0,1.0).
1628 :
1629 : -- ALGLIB --
1630 : Copyright 25.03.2011 by Bochkanov Sergey
1631 : *************************************************************************/
1632 0 : void mlpgetoutputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams)
1633 : {
1634 : jmp_buf _break_jump;
1635 : alglib_impl::ae_state _alglib_env_state;
1636 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1637 0 : if( setjmp(_break_jump) )
1638 : {
1639 : #if !defined(AE_NO_EXCEPTIONS)
1640 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1641 : #else
1642 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1643 : return;
1644 : #endif
1645 : }
1646 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1647 0 : if( _xparams.flags!=0x0 )
1648 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1649 0 : alglib_impl::mlpgetoutputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, &mean, &sigma, &_alglib_env_state);
1650 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1651 0 : return;
1652 : }
1653 :
1654 : /*************************************************************************
1655 : This function returns information about Ith neuron of Kth layer
1656 :
1657 : INPUT PARAMETERS:
1658 : Network - network
1659 : K - layer index
1660 : I - neuron index (within layer)
1661 :
1662 : OUTPUT PARAMETERS:
1663 : FKind - activation function type (used by MLPActivationFunction())
1664 : this value is zero for input or linear neurons
1665 : Threshold - also called offset, bias
1666 : zero for input neurons
1667 :
1668 : NOTE: this function throws exception if layer or neuron with given index
1669 : do not exists.
1670 :
1671 : -- ALGLIB --
1672 : Copyright 25.03.2011 by Bochkanov Sergey
1673 : *************************************************************************/
1674 0 : void mlpgetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, ae_int_t &fkind, double &threshold, const xparams _xparams)
1675 : {
1676 : jmp_buf _break_jump;
1677 : alglib_impl::ae_state _alglib_env_state;
1678 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1679 0 : if( setjmp(_break_jump) )
1680 : {
1681 : #if !defined(AE_NO_EXCEPTIONS)
1682 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1683 : #else
1684 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1685 : return;
1686 : #endif
1687 : }
1688 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1689 0 : if( _xparams.flags!=0x0 )
1690 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1691 0 : alglib_impl::mlpgetneuroninfo(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, i, &fkind, &threshold, &_alglib_env_state);
1692 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1693 0 : return;
1694 : }
1695 :
1696 : /*************************************************************************
1697 : This function returns information about connection from I0-th neuron of
1698 : K0-th layer to I1-th neuron of K1-th layer.
1699 :
1700 : INPUT PARAMETERS:
1701 : Network - network
1702 : K0 - layer index
1703 : I0 - neuron index (within layer)
1704 : K1 - layer index
1705 : I1 - neuron index (within layer)
1706 :
1707 : RESULT:
1708 : connection weight (zero for non-existent connections)
1709 :
1710 : This function:
1711 : 1. throws exception if layer or neuron with given index do not exists.
1712 : 2. returns zero if neurons exist, but there is no connection between them
1713 :
1714 : -- ALGLIB --
1715 : Copyright 25.03.2011 by Bochkanov Sergey
1716 : *************************************************************************/
1717 0 : double mlpgetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const xparams _xparams)
1718 : {
1719 : jmp_buf _break_jump;
1720 : alglib_impl::ae_state _alglib_env_state;
1721 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1722 0 : if( setjmp(_break_jump) )
1723 : {
1724 : #if !defined(AE_NO_EXCEPTIONS)
1725 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1726 : #else
1727 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1728 : return 0;
1729 : #endif
1730 : }
1731 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1732 0 : if( _xparams.flags!=0x0 )
1733 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1734 0 : double result = alglib_impl::mlpgetweight(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k0, i0, k1, i1, &_alglib_env_state);
1735 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1736 0 : return *(reinterpret_cast<double*>(&result));
1737 : }
1738 :
1739 : /*************************************************************************
1740 : This function sets offset/scaling coefficients for I-th input of the
1741 : network.
1742 :
1743 : INPUT PARAMETERS:
1744 : Network - network
1745 : I - input index
1746 : Mean - mean term
1747 : Sigma - sigma term (if zero, will be replaced by 1.0)
1748 :
1749 : NTE: I-th input is passed through linear transformation
1750 : IN[i] = (IN[i]-Mean)/Sigma
1751 : before feeding to the network. This function sets Mean and Sigma.
1752 :
1753 : -- ALGLIB --
1754 : Copyright 25.03.2011 by Bochkanov Sergey
1755 : *************************************************************************/
1756 0 : void mlpsetinputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams)
1757 : {
1758 : jmp_buf _break_jump;
1759 : alglib_impl::ae_state _alglib_env_state;
1760 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1761 0 : if( setjmp(_break_jump) )
1762 : {
1763 : #if !defined(AE_NO_EXCEPTIONS)
1764 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1765 : #else
1766 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1767 : return;
1768 : #endif
1769 : }
1770 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1771 0 : if( _xparams.flags!=0x0 )
1772 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1773 0 : alglib_impl::mlpsetinputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, mean, sigma, &_alglib_env_state);
1774 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1775 0 : return;
1776 : }
1777 :
1778 : /*************************************************************************
1779 : This function sets offset/scaling coefficients for I-th output of the
1780 : network.
1781 :
1782 : INPUT PARAMETERS:
1783 : Network - network
1784 : I - input index
1785 : Mean - mean term
1786 : Sigma - sigma term (if zero, will be replaced by 1.0)
1787 :
1788 : OUTPUT PARAMETERS:
1789 :
1790 : NOTE: I-th output is passed through linear transformation
1791 : OUT[i] = OUT[i]*Sigma+Mean
1792 : before returning it to user. This function sets Sigma/Mean. In case we
1793 : have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything
1794 : other than(0.0,1.0) - this function will throw exception.
1795 :
1796 : -- ALGLIB --
1797 : Copyright 25.03.2011 by Bochkanov Sergey
1798 : *************************************************************************/
1799 0 : void mlpsetoutputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams)
1800 : {
1801 : jmp_buf _break_jump;
1802 : alglib_impl::ae_state _alglib_env_state;
1803 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1804 0 : if( setjmp(_break_jump) )
1805 : {
1806 : #if !defined(AE_NO_EXCEPTIONS)
1807 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1808 : #else
1809 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1810 : return;
1811 : #endif
1812 : }
1813 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1814 0 : if( _xparams.flags!=0x0 )
1815 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1816 0 : alglib_impl::mlpsetoutputscaling(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), i, mean, sigma, &_alglib_env_state);
1817 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1818 0 : return;
1819 : }
1820 :
1821 : /*************************************************************************
1822 : This function modifies information about Ith neuron of Kth layer
1823 :
1824 : INPUT PARAMETERS:
1825 : Network - network
1826 : K - layer index
1827 : I - neuron index (within layer)
1828 : FKind - activation function type (used by MLPActivationFunction())
1829 : this value must be zero for input neurons
1830 : (you can not set activation function for input neurons)
1831 : Threshold - also called offset, bias
1832 : this value must be zero for input neurons
1833 : (you can not set threshold for input neurons)
1834 :
1835 : NOTES:
1836 : 1. this function throws exception if layer or neuron with given index do
1837 : not exists.
1838 : 2. this function also throws exception when you try to set non-linear
1839 : activation function for input neurons (any kind of network) or for output
1840 : neurons of classifier network.
1841 : 3. this function throws exception when you try to set non-zero threshold for
1842 : input neurons (any kind of network).
1843 :
1844 : -- ALGLIB --
1845 : Copyright 25.03.2011 by Bochkanov Sergey
1846 : *************************************************************************/
1847 0 : void mlpsetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, const ae_int_t fkind, const double threshold, const xparams _xparams)
1848 : {
1849 : jmp_buf _break_jump;
1850 : alglib_impl::ae_state _alglib_env_state;
1851 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1852 0 : if( setjmp(_break_jump) )
1853 : {
1854 : #if !defined(AE_NO_EXCEPTIONS)
1855 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1856 : #else
1857 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1858 : return;
1859 : #endif
1860 : }
1861 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1862 0 : if( _xparams.flags!=0x0 )
1863 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1864 0 : alglib_impl::mlpsetneuroninfo(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k, i, fkind, threshold, &_alglib_env_state);
1865 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1866 0 : return;
1867 : }
1868 :
1869 : /*************************************************************************
1870 : This function modifies information about connection from I0-th neuron of
1871 : K0-th layer to I1-th neuron of K1-th layer.
1872 :
1873 : INPUT PARAMETERS:
1874 : Network - network
1875 : K0 - layer index
1876 : I0 - neuron index (within layer)
1877 : K1 - layer index
1878 : I1 - neuron index (within layer)
1879 : W - connection weight (must be zero for non-existent
1880 : connections)
1881 :
1882 : This function:
1883 : 1. throws exception if layer or neuron with given index do not exists.
1884 : 2. throws exception if you try to set non-zero weight for non-existent
1885 : connection
1886 :
1887 : -- ALGLIB --
1888 : Copyright 25.03.2011 by Bochkanov Sergey
1889 : *************************************************************************/
1890 0 : void mlpsetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const double w, const xparams _xparams)
1891 : {
1892 : jmp_buf _break_jump;
1893 : alglib_impl::ae_state _alglib_env_state;
1894 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1895 0 : if( setjmp(_break_jump) )
1896 : {
1897 : #if !defined(AE_NO_EXCEPTIONS)
1898 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1899 : #else
1900 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1901 : return;
1902 : #endif
1903 : }
1904 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1905 0 : if( _xparams.flags!=0x0 )
1906 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1907 0 : alglib_impl::mlpsetweight(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), k0, i0, k1, i1, w, &_alglib_env_state);
1908 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1909 0 : return;
1910 : }
1911 :
1912 : /*************************************************************************
1913 : Neural network activation function
1914 :
1915 : INPUT PARAMETERS:
1916 : NET - neuron input
1917 : K - function index (zero for linear function)
1918 :
1919 : OUTPUT PARAMETERS:
1920 : F - function
1921 : DF - its derivative
1922 : D2F - its second derivative
1923 :
1924 : -- ALGLIB --
1925 : Copyright 04.11.2007 by Bochkanov Sergey
1926 : *************************************************************************/
1927 0 : void mlpactivationfunction(const double net, const ae_int_t k, double &f, double &df, double &d2f, const xparams _xparams)
1928 : {
1929 : jmp_buf _break_jump;
1930 : alglib_impl::ae_state _alglib_env_state;
1931 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1932 0 : if( setjmp(_break_jump) )
1933 : {
1934 : #if !defined(AE_NO_EXCEPTIONS)
1935 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1936 : #else
1937 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1938 : return;
1939 : #endif
1940 : }
1941 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1942 0 : if( _xparams.flags!=0x0 )
1943 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1944 0 : alglib_impl::mlpactivationfunction(net, k, &f, &df, &d2f, &_alglib_env_state);
1945 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1946 0 : return;
1947 : }
1948 :
1949 : /*************************************************************************
1950 : Procesing
1951 :
1952 : INPUT PARAMETERS:
1953 : Network - neural network
1954 : X - input vector, array[0..NIn-1].
1955 :
1956 : OUTPUT PARAMETERS:
1957 : Y - result. Regression estimate when solving regression task,
1958 : vector of posterior probabilities for classification task.
1959 :
1960 : See also MLPProcessI
1961 :
1962 : -- ALGLIB --
1963 : Copyright 04.11.2007 by Bochkanov Sergey
1964 : *************************************************************************/
1965 0 : void mlpprocess(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
1966 : {
1967 : jmp_buf _break_jump;
1968 : alglib_impl::ae_state _alglib_env_state;
1969 0 : alglib_impl::ae_state_init(&_alglib_env_state);
1970 0 : if( setjmp(_break_jump) )
1971 : {
1972 : #if !defined(AE_NO_EXCEPTIONS)
1973 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
1974 : #else
1975 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
1976 : return;
1977 : #endif
1978 : }
1979 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
1980 0 : if( _xparams.flags!=0x0 )
1981 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
1982 0 : alglib_impl::mlpprocess(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
1983 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
1984 0 : return;
1985 : }
1986 :
1987 : /*************************************************************************
1988 : 'interactive' variant of MLPProcess for languages like Python which
1989 : support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the
1990 : interpreter
1991 :
1992 : This function allocates new array on each call, so it is significantly
1993 : slower than its 'non-interactive' counterpart, but it is more convenient
1994 : when you call it from command line.
1995 :
1996 : -- ALGLIB --
1997 : Copyright 21.09.2010 by Bochkanov Sergey
1998 : *************************************************************************/
1999 0 : void mlpprocessi(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
2000 : {
2001 : jmp_buf _break_jump;
2002 : alglib_impl::ae_state _alglib_env_state;
2003 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2004 0 : if( setjmp(_break_jump) )
2005 : {
2006 : #if !defined(AE_NO_EXCEPTIONS)
2007 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2008 : #else
2009 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2010 : return;
2011 : #endif
2012 : }
2013 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2014 0 : if( _xparams.flags!=0x0 )
2015 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2016 0 : alglib_impl::mlpprocessi(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
2017 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2018 0 : return;
2019 : }
2020 :
2021 : /*************************************************************************
2022 : Error of the neural network on dataset.
2023 :
2024 : ! COMMERCIAL EDITION OF ALGLIB:
2025 : !
2026 : ! Commercial Edition of ALGLIB includes following important improvements
2027 : ! of this function:
2028 : ! * high-performance native backend with same C# interface (C# version)
2029 : ! * multithreading support (C++ and C# versions)
2030 : !
2031 : ! We recommend you to read 'Working with commercial version' section of
2032 : ! ALGLIB Reference Manual in order to find out how to use performance-
2033 : ! related features provided by commercial edition of ALGLIB.
2034 :
2035 : INPUT PARAMETERS:
2036 : Network - neural network;
2037 : XY - training set, see below for information on the
2038 : training set format;
2039 : NPoints - points count.
2040 :
2041 : RESULT:
2042 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
2043 :
2044 : DATASET FORMAT:
2045 :
2046 : This function uses two different dataset formats - one for regression
2047 : networks, another one for classification networks.
2048 :
2049 : For regression networks with NIn inputs and NOut outputs following dataset
2050 : format is used:
2051 : * dataset is given by NPoints*(NIn+NOut) matrix
2052 : * each row corresponds to one example
2053 : * first NIn columns are inputs, next NOut columns are outputs
2054 :
2055 : For classification networks with NIn inputs and NClasses clases following
2056 : dataset format is used:
2057 : * dataset is given by NPoints*(NIn+1) matrix
2058 : * each row corresponds to one example
2059 : * first NIn columns are inputs, last column stores class number (from 0 to
2060 : NClasses-1).
2061 :
2062 : -- ALGLIB --
2063 : Copyright 04.11.2007 by Bochkanov Sergey
2064 : *************************************************************************/
2065 0 : double mlperror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2066 : {
2067 : jmp_buf _break_jump;
2068 : alglib_impl::ae_state _alglib_env_state;
2069 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2070 0 : if( setjmp(_break_jump) )
2071 : {
2072 : #if !defined(AE_NO_EXCEPTIONS)
2073 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2074 : #else
2075 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2076 : return 0;
2077 : #endif
2078 : }
2079 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2080 0 : if( _xparams.flags!=0x0 )
2081 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2082 0 : double result = alglib_impl::mlperror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2083 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2084 0 : return *(reinterpret_cast<double*>(&result));
2085 : }
2086 :
2087 : /*************************************************************************
2088 : Error of the neural network on dataset given by sparse matrix.
2089 :
2090 : ! COMMERCIAL EDITION OF ALGLIB:
2091 : !
2092 : ! Commercial Edition of ALGLIB includes following important improvements
2093 : ! of this function:
2094 : ! * high-performance native backend with same C# interface (C# version)
2095 : ! * multithreading support (C++ and C# versions)
2096 : !
2097 : ! We recommend you to read 'Working with commercial version' section of
2098 : ! ALGLIB Reference Manual in order to find out how to use performance-
2099 : ! related features provided by commercial edition of ALGLIB.
2100 :
2101 : INPUT PARAMETERS:
2102 : Network - neural network
2103 : XY - training set, see below for information on the
2104 : training set format. This function checks correctness
2105 : of the dataset (no NANs/INFs, class numbers are
2106 : correct) and throws exception when incorrect dataset
2107 : is passed. Sparse matrix must use CRS format for
2108 : storage.
2109 : NPoints - points count, >=0
2110 :
2111 : RESULT:
2112 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
2113 :
2114 : DATASET FORMAT:
2115 :
2116 : This function uses two different dataset formats - one for regression
2117 : networks, another one for classification networks.
2118 :
2119 : For regression networks with NIn inputs and NOut outputs following dataset
2120 : format is used:
2121 : * dataset is given by NPoints*(NIn+NOut) matrix
2122 : * each row corresponds to one example
2123 : * first NIn columns are inputs, next NOut columns are outputs
2124 :
2125 : For classification networks with NIn inputs and NClasses clases following
2126 : dataset format is used:
2127 : * dataset is given by NPoints*(NIn+1) matrix
2128 : * each row corresponds to one example
2129 : * first NIn columns are inputs, last column stores class number (from 0 to
2130 : NClasses-1).
2131 :
2132 : -- ALGLIB --
2133 : Copyright 23.07.2012 by Bochkanov Sergey
2134 : *************************************************************************/
2135 0 : double mlperrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2136 : {
2137 : jmp_buf _break_jump;
2138 : alglib_impl::ae_state _alglib_env_state;
2139 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2140 0 : if( setjmp(_break_jump) )
2141 : {
2142 : #if !defined(AE_NO_EXCEPTIONS)
2143 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2144 : #else
2145 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2146 : return 0;
2147 : #endif
2148 : }
2149 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2150 0 : if( _xparams.flags!=0x0 )
2151 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2152 0 : double result = alglib_impl::mlperrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2153 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2154 0 : return *(reinterpret_cast<double*>(&result));
2155 : }
2156 :
2157 : /*************************************************************************
2158 : Natural error function for neural network, internal subroutine.
2159 :
2160 : NOTE: this function is single-threaded. Unlike other error function, it
2161 : receives no speed-up from being executed in SMP mode.
2162 :
2163 : -- ALGLIB --
2164 : Copyright 04.11.2007 by Bochkanov Sergey
2165 : *************************************************************************/
2166 0 : double mlperrorn(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
2167 : {
2168 : jmp_buf _break_jump;
2169 : alglib_impl::ae_state _alglib_env_state;
2170 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2171 0 : if( setjmp(_break_jump) )
2172 : {
2173 : #if !defined(AE_NO_EXCEPTIONS)
2174 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2175 : #else
2176 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2177 : return 0;
2178 : #endif
2179 : }
2180 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2181 0 : if( _xparams.flags!=0x0 )
2182 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2183 0 : double result = alglib_impl::mlperrorn(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
2184 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2185 0 : return *(reinterpret_cast<double*>(&result));
2186 : }
2187 :
2188 : /*************************************************************************
2189 : Classification error of the neural network on dataset.
2190 :
2191 : ! COMMERCIAL EDITION OF ALGLIB:
2192 : !
2193 : ! Commercial Edition of ALGLIB includes following important improvements
2194 : ! of this function:
2195 : ! * high-performance native backend with same C# interface (C# version)
2196 : ! * multithreading support (C++ and C# versions)
2197 : !
2198 : ! We recommend you to read 'Working with commercial version' section of
2199 : ! ALGLIB Reference Manual in order to find out how to use performance-
2200 : ! related features provided by commercial edition of ALGLIB.
2201 :
2202 : INPUT PARAMETERS:
2203 : Network - neural network;
2204 : XY - training set, see below for information on the
2205 : training set format;
2206 : NPoints - points count.
2207 :
2208 : RESULT:
2209 : classification error (number of misclassified cases)
2210 :
2211 : DATASET FORMAT:
2212 :
2213 : This function uses two different dataset formats - one for regression
2214 : networks, another one for classification networks.
2215 :
2216 : For regression networks with NIn inputs and NOut outputs following dataset
2217 : format is used:
2218 : * dataset is given by NPoints*(NIn+NOut) matrix
2219 : * each row corresponds to one example
2220 : * first NIn columns are inputs, next NOut columns are outputs
2221 :
2222 : For classification networks with NIn inputs and NClasses clases following
2223 : dataset format is used:
2224 : * dataset is given by NPoints*(NIn+1) matrix
2225 : * each row corresponds to one example
2226 : * first NIn columns are inputs, last column stores class number (from 0 to
2227 : NClasses-1).
2228 :
2229 : -- ALGLIB --
2230 : Copyright 04.11.2007 by Bochkanov Sergey
2231 : *************************************************************************/
2232 0 : ae_int_t mlpclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2233 : {
2234 : jmp_buf _break_jump;
2235 : alglib_impl::ae_state _alglib_env_state;
2236 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2237 0 : if( setjmp(_break_jump) )
2238 : {
2239 : #if !defined(AE_NO_EXCEPTIONS)
2240 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2241 : #else
2242 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2243 : return 0;
2244 : #endif
2245 : }
2246 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2247 0 : if( _xparams.flags!=0x0 )
2248 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2249 0 : alglib_impl::ae_int_t result = alglib_impl::mlpclserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2250 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2251 0 : return *(reinterpret_cast<ae_int_t*>(&result));
2252 : }
2253 :
2254 : /*************************************************************************
2255 : Relative classification error on the test set.
2256 :
2257 : ! COMMERCIAL EDITION OF ALGLIB:
2258 : !
2259 : ! Commercial Edition of ALGLIB includes following important improvements
2260 : ! of this function:
2261 : ! * high-performance native backend with same C# interface (C# version)
2262 : ! * multithreading support (C++ and C# versions)
2263 : !
2264 : ! We recommend you to read 'Working with commercial version' section of
2265 : ! ALGLIB Reference Manual in order to find out how to use performance-
2266 : ! related features provided by commercial edition of ALGLIB.
2267 :
2268 : INPUT PARAMETERS:
2269 : Network - neural network;
2270 : XY - training set, see below for information on the
2271 : training set format;
2272 : NPoints - points count.
2273 :
2274 : RESULT:
2275 : Percent of incorrectly classified cases. Works both for classifier
2276 : networks and general purpose networks used as classifiers.
2277 :
2278 : DATASET FORMAT:
2279 :
2280 : This function uses two different dataset formats - one for regression
2281 : networks, another one for classification networks.
2282 :
2283 : For regression networks with NIn inputs and NOut outputs following dataset
2284 : format is used:
2285 : * dataset is given by NPoints*(NIn+NOut) matrix
2286 : * each row corresponds to one example
2287 : * first NIn columns are inputs, next NOut columns are outputs
2288 :
2289 : For classification networks with NIn inputs and NClasses clases following
2290 : dataset format is used:
2291 : * dataset is given by NPoints*(NIn+1) matrix
2292 : * each row corresponds to one example
2293 : * first NIn columns are inputs, last column stores class number (from 0 to
2294 : NClasses-1).
2295 :
2296 : -- ALGLIB --
2297 : Copyright 25.12.2008 by Bochkanov Sergey
2298 : *************************************************************************/
2299 0 : double mlprelclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2300 : {
2301 : jmp_buf _break_jump;
2302 : alglib_impl::ae_state _alglib_env_state;
2303 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2304 0 : if( setjmp(_break_jump) )
2305 : {
2306 : #if !defined(AE_NO_EXCEPTIONS)
2307 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2308 : #else
2309 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2310 : return 0;
2311 : #endif
2312 : }
2313 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2314 0 : if( _xparams.flags!=0x0 )
2315 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2316 0 : double result = alglib_impl::mlprelclserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2317 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2318 0 : return *(reinterpret_cast<double*>(&result));
2319 : }
2320 :
2321 : /*************************************************************************
2322 : Relative classification error on the test set given by sparse matrix.
2323 :
2324 : ! COMMERCIAL EDITION OF ALGLIB:
2325 : !
2326 : ! Commercial Edition of ALGLIB includes following important improvements
2327 : ! of this function:
2328 : ! * high-performance native backend with same C# interface (C# version)
2329 : ! * multithreading support (C++ and C# versions)
2330 : !
2331 : ! We recommend you to read 'Working with commercial version' section of
2332 : ! ALGLIB Reference Manual in order to find out how to use performance-
2333 : ! related features provided by commercial edition of ALGLIB.
2334 :
2335 : INPUT PARAMETERS:
2336 : Network - neural network;
2337 : XY - training set, see below for information on the
2338 : training set format. Sparse matrix must use CRS format
2339 : for storage.
2340 : NPoints - points count, >=0.
2341 :
2342 : RESULT:
2343 : Percent of incorrectly classified cases. Works both for classifier
2344 : networks and general purpose networks used as classifiers.
2345 :
2346 : DATASET FORMAT:
2347 :
2348 : This function uses two different dataset formats - one for regression
2349 : networks, another one for classification networks.
2350 :
2351 : For regression networks with NIn inputs and NOut outputs following dataset
2352 : format is used:
2353 : * dataset is given by NPoints*(NIn+NOut) matrix
2354 : * each row corresponds to one example
2355 : * first NIn columns are inputs, next NOut columns are outputs
2356 :
2357 : For classification networks with NIn inputs and NClasses clases following
2358 : dataset format is used:
2359 : * dataset is given by NPoints*(NIn+1) matrix
2360 : * each row corresponds to one example
2361 : * first NIn columns are inputs, last column stores class number (from 0 to
2362 : NClasses-1).
2363 :
2364 : -- ALGLIB --
2365 : Copyright 09.08.2012 by Bochkanov Sergey
2366 : *************************************************************************/
2367 0 : double mlprelclserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2368 : {
2369 : jmp_buf _break_jump;
2370 : alglib_impl::ae_state _alglib_env_state;
2371 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2372 0 : if( setjmp(_break_jump) )
2373 : {
2374 : #if !defined(AE_NO_EXCEPTIONS)
2375 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2376 : #else
2377 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2378 : return 0;
2379 : #endif
2380 : }
2381 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2382 0 : if( _xparams.flags!=0x0 )
2383 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2384 0 : double result = alglib_impl::mlprelclserrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2385 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2386 0 : return *(reinterpret_cast<double*>(&result));
2387 : }
2388 :
2389 : /*************************************************************************
2390 : Average cross-entropy (in bits per element) on the test set.
2391 :
2392 : ! COMMERCIAL EDITION OF ALGLIB:
2393 : !
2394 : ! Commercial Edition of ALGLIB includes following important improvements
2395 : ! of this function:
2396 : ! * high-performance native backend with same C# interface (C# version)
2397 : ! * multithreading support (C++ and C# versions)
2398 : !
2399 : ! We recommend you to read 'Working with commercial version' section of
2400 : ! ALGLIB Reference Manual in order to find out how to use performance-
2401 : ! related features provided by commercial edition of ALGLIB.
2402 :
2403 : INPUT PARAMETERS:
2404 : Network - neural network;
2405 : XY - training set, see below for information on the
2406 : training set format;
2407 : NPoints - points count.
2408 :
2409 : RESULT:
2410 : CrossEntropy/(NPoints*LN(2)).
2411 : Zero if network solves regression task.
2412 :
2413 : DATASET FORMAT:
2414 :
2415 : This function uses two different dataset formats - one for regression
2416 : networks, another one for classification networks.
2417 :
2418 : For regression networks with NIn inputs and NOut outputs following dataset
2419 : format is used:
2420 : * dataset is given by NPoints*(NIn+NOut) matrix
2421 : * each row corresponds to one example
2422 : * first NIn columns are inputs, next NOut columns are outputs
2423 :
2424 : For classification networks with NIn inputs and NClasses clases following
2425 : dataset format is used:
2426 : * dataset is given by NPoints*(NIn+1) matrix
2427 : * each row corresponds to one example
2428 : * first NIn columns are inputs, last column stores class number (from 0 to
2429 : NClasses-1).
2430 :
2431 : -- ALGLIB --
2432 : Copyright 08.01.2009 by Bochkanov Sergey
2433 : *************************************************************************/
2434 0 : double mlpavgce(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2435 : {
2436 : jmp_buf _break_jump;
2437 : alglib_impl::ae_state _alglib_env_state;
2438 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2439 0 : if( setjmp(_break_jump) )
2440 : {
2441 : #if !defined(AE_NO_EXCEPTIONS)
2442 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2443 : #else
2444 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2445 : return 0;
2446 : #endif
2447 : }
2448 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2449 0 : if( _xparams.flags!=0x0 )
2450 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2451 0 : double result = alglib_impl::mlpavgce(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2452 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2453 0 : return *(reinterpret_cast<double*>(&result));
2454 : }
2455 :
2456 : /*************************************************************************
2457 : Average cross-entropy (in bits per element) on the test set given by
2458 : sparse matrix.
2459 :
2460 : ! COMMERCIAL EDITION OF ALGLIB:
2461 : !
2462 : ! Commercial Edition of ALGLIB includes following important improvements
2463 : ! of this function:
2464 : ! * high-performance native backend with same C# interface (C# version)
2465 : ! * multithreading support (C++ and C# versions)
2466 : !
2467 : ! We recommend you to read 'Working with commercial version' section of
2468 : ! ALGLIB Reference Manual in order to find out how to use performance-
2469 : ! related features provided by commercial edition of ALGLIB.
2470 :
2471 : INPUT PARAMETERS:
2472 : Network - neural network;
2473 : XY - training set, see below for information on the
2474 : training set format. This function checks correctness
2475 : of the dataset (no NANs/INFs, class numbers are
2476 : correct) and throws exception when incorrect dataset
2477 : is passed. Sparse matrix must use CRS format for
2478 : storage.
2479 : NPoints - points count, >=0.
2480 :
2481 : RESULT:
2482 : CrossEntropy/(NPoints*LN(2)).
2483 : Zero if network solves regression task.
2484 :
2485 : DATASET FORMAT:
2486 :
2487 : This function uses two different dataset formats - one for regression
2488 : networks, another one for classification networks.
2489 :
2490 : For regression networks with NIn inputs and NOut outputs following dataset
2491 : format is used:
2492 : * dataset is given by NPoints*(NIn+NOut) matrix
2493 : * each row corresponds to one example
2494 : * first NIn columns are inputs, next NOut columns are outputs
2495 :
2496 : For classification networks with NIn inputs and NClasses clases following
2497 : dataset format is used:
2498 : * dataset is given by NPoints*(NIn+1) matrix
2499 : * each row corresponds to one example
2500 : * first NIn columns are inputs, last column stores class number (from 0 to
2501 : NClasses-1).
2502 :
2503 : -- ALGLIB --
2504 : Copyright 9.08.2012 by Bochkanov Sergey
2505 : *************************************************************************/
2506 0 : double mlpavgcesparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2507 : {
2508 : jmp_buf _break_jump;
2509 : alglib_impl::ae_state _alglib_env_state;
2510 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2511 0 : if( setjmp(_break_jump) )
2512 : {
2513 : #if !defined(AE_NO_EXCEPTIONS)
2514 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2515 : #else
2516 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2517 : return 0;
2518 : #endif
2519 : }
2520 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2521 0 : if( _xparams.flags!=0x0 )
2522 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2523 0 : double result = alglib_impl::mlpavgcesparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2524 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2525 0 : return *(reinterpret_cast<double*>(&result));
2526 : }
2527 :
2528 : /*************************************************************************
2529 : RMS error on the test set given.
2530 :
2531 : ! COMMERCIAL EDITION OF ALGLIB:
2532 : !
2533 : ! Commercial Edition of ALGLIB includes following important improvements
2534 : ! of this function:
2535 : ! * high-performance native backend with same C# interface (C# version)
2536 : ! * multithreading support (C++ and C# versions)
2537 : !
2538 : ! We recommend you to read 'Working with commercial version' section of
2539 : ! ALGLIB Reference Manual in order to find out how to use performance-
2540 : ! related features provided by commercial edition of ALGLIB.
2541 :
2542 : INPUT PARAMETERS:
2543 : Network - neural network;
2544 : XY - training set, see below for information on the
2545 : training set format;
2546 : NPoints - points count.
2547 :
2548 : RESULT:
2549 : Root mean square error. Its meaning for regression task is obvious. As for
2550 : classification task, RMS error means error when estimating posterior
2551 : probabilities.
2552 :
2553 : DATASET FORMAT:
2554 :
2555 : This function uses two different dataset formats - one for regression
2556 : networks, another one for classification networks.
2557 :
2558 : For regression networks with NIn inputs and NOut outputs following dataset
2559 : format is used:
2560 : * dataset is given by NPoints*(NIn+NOut) matrix
2561 : * each row corresponds to one example
2562 : * first NIn columns are inputs, next NOut columns are outputs
2563 :
2564 : For classification networks with NIn inputs and NClasses clases following
2565 : dataset format is used:
2566 : * dataset is given by NPoints*(NIn+1) matrix
2567 : * each row corresponds to one example
2568 : * first NIn columns are inputs, last column stores class number (from 0 to
2569 : NClasses-1).
2570 :
2571 : -- ALGLIB --
2572 : Copyright 04.11.2007 by Bochkanov Sergey
2573 : *************************************************************************/
2574 0 : double mlprmserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2575 : {
2576 : jmp_buf _break_jump;
2577 : alglib_impl::ae_state _alglib_env_state;
2578 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2579 0 : if( setjmp(_break_jump) )
2580 : {
2581 : #if !defined(AE_NO_EXCEPTIONS)
2582 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2583 : #else
2584 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2585 : return 0;
2586 : #endif
2587 : }
2588 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2589 0 : if( _xparams.flags!=0x0 )
2590 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2591 0 : double result = alglib_impl::mlprmserror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2592 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2593 0 : return *(reinterpret_cast<double*>(&result));
2594 : }
2595 :
2596 : /*************************************************************************
2597 : RMS error on the test set given by sparse matrix.
2598 :
2599 : ! COMMERCIAL EDITION OF ALGLIB:
2600 : !
2601 : ! Commercial Edition of ALGLIB includes following important improvements
2602 : ! of this function:
2603 : ! * high-performance native backend with same C# interface (C# version)
2604 : ! * multithreading support (C++ and C# versions)
2605 : !
2606 : ! We recommend you to read 'Working with commercial version' section of
2607 : ! ALGLIB Reference Manual in order to find out how to use performance-
2608 : ! related features provided by commercial edition of ALGLIB.
2609 :
2610 : INPUT PARAMETERS:
2611 : Network - neural network;
2612 : XY - training set, see below for information on the
2613 : training set format. This function checks correctness
2614 : of the dataset (no NANs/INFs, class numbers are
2615 : correct) and throws exception when incorrect dataset
2616 : is passed. Sparse matrix must use CRS format for
2617 : storage.
2618 : NPoints - points count, >=0.
2619 :
2620 : RESULT:
2621 : Root mean square error. Its meaning for regression task is obvious. As for
2622 : classification task, RMS error means error when estimating posterior
2623 : probabilities.
2624 :
2625 : DATASET FORMAT:
2626 :
2627 : This function uses two different dataset formats - one for regression
2628 : networks, another one for classification networks.
2629 :
2630 : For regression networks with NIn inputs and NOut outputs following dataset
2631 : format is used:
2632 : * dataset is given by NPoints*(NIn+NOut) matrix
2633 : * each row corresponds to one example
2634 : * first NIn columns are inputs, next NOut columns are outputs
2635 :
2636 : For classification networks with NIn inputs and NClasses clases following
2637 : dataset format is used:
2638 : * dataset is given by NPoints*(NIn+1) matrix
2639 : * each row corresponds to one example
2640 : * first NIn columns are inputs, last column stores class number (from 0 to
2641 : NClasses-1).
2642 :
2643 : -- ALGLIB --
2644 : Copyright 09.08.2012 by Bochkanov Sergey
2645 : *************************************************************************/
2646 0 : double mlprmserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2647 : {
2648 : jmp_buf _break_jump;
2649 : alglib_impl::ae_state _alglib_env_state;
2650 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2651 0 : if( setjmp(_break_jump) )
2652 : {
2653 : #if !defined(AE_NO_EXCEPTIONS)
2654 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2655 : #else
2656 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2657 : return 0;
2658 : #endif
2659 : }
2660 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2661 0 : if( _xparams.flags!=0x0 )
2662 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2663 0 : double result = alglib_impl::mlprmserrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2664 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2665 0 : return *(reinterpret_cast<double*>(&result));
2666 : }
2667 :
2668 : /*************************************************************************
2669 : Average absolute error on the test set.
2670 :
2671 : ! COMMERCIAL EDITION OF ALGLIB:
2672 : !
2673 : ! Commercial Edition of ALGLIB includes following important improvements
2674 : ! of this function:
2675 : ! * high-performance native backend with same C# interface (C# version)
2676 : ! * multithreading support (C++ and C# versions)
2677 : !
2678 : ! We recommend you to read 'Working with commercial version' section of
2679 : ! ALGLIB Reference Manual in order to find out how to use performance-
2680 : ! related features provided by commercial edition of ALGLIB.
2681 :
2682 : INPUT PARAMETERS:
2683 : Network - neural network;
2684 : XY - training set, see below for information on the
2685 : training set format;
2686 : NPoints - points count.
2687 :
2688 : RESULT:
2689 : Its meaning for regression task is obvious. As for classification task, it
2690 : means average error when estimating posterior probabilities.
2691 :
2692 : DATASET FORMAT:
2693 :
2694 : This function uses two different dataset formats - one for regression
2695 : networks, another one for classification networks.
2696 :
2697 : For regression networks with NIn inputs and NOut outputs following dataset
2698 : format is used:
2699 : * dataset is given by NPoints*(NIn+NOut) matrix
2700 : * each row corresponds to one example
2701 : * first NIn columns are inputs, next NOut columns are outputs
2702 :
2703 : For classification networks with NIn inputs and NClasses clases following
2704 : dataset format is used:
2705 : * dataset is given by NPoints*(NIn+1) matrix
2706 : * each row corresponds to one example
2707 : * first NIn columns are inputs, last column stores class number (from 0 to
2708 : NClasses-1).
2709 :
2710 : -- ALGLIB --
2711 : Copyright 11.03.2008 by Bochkanov Sergey
2712 : *************************************************************************/
2713 0 : double mlpavgerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2714 : {
2715 : jmp_buf _break_jump;
2716 : alglib_impl::ae_state _alglib_env_state;
2717 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2718 0 : if( setjmp(_break_jump) )
2719 : {
2720 : #if !defined(AE_NO_EXCEPTIONS)
2721 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2722 : #else
2723 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2724 : return 0;
2725 : #endif
2726 : }
2727 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2728 0 : if( _xparams.flags!=0x0 )
2729 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2730 0 : double result = alglib_impl::mlpavgerror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2731 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2732 0 : return *(reinterpret_cast<double*>(&result));
2733 : }
2734 :
2735 : /*************************************************************************
2736 : Average absolute error on the test set given by sparse matrix.
2737 :
2738 : ! COMMERCIAL EDITION OF ALGLIB:
2739 : !
2740 : ! Commercial Edition of ALGLIB includes following important improvements
2741 : ! of this function:
2742 : ! * high-performance native backend with same C# interface (C# version)
2743 : ! * multithreading support (C++ and C# versions)
2744 : !
2745 : ! We recommend you to read 'Working with commercial version' section of
2746 : ! ALGLIB Reference Manual in order to find out how to use performance-
2747 : ! related features provided by commercial edition of ALGLIB.
2748 :
2749 : INPUT PARAMETERS:
2750 : Network - neural network;
2751 : XY - training set, see below for information on the
2752 : training set format. This function checks correctness
2753 : of the dataset (no NANs/INFs, class numbers are
2754 : correct) and throws exception when incorrect dataset
2755 : is passed. Sparse matrix must use CRS format for
2756 : storage.
2757 : NPoints - points count, >=0.
2758 :
2759 : RESULT:
2760 : Its meaning for regression task is obvious. As for classification task, it
2761 : means average error when estimating posterior probabilities.
2762 :
2763 : DATASET FORMAT:
2764 :
2765 : This function uses two different dataset formats - one for regression
2766 : networks, another one for classification networks.
2767 :
2768 : For regression networks with NIn inputs and NOut outputs following dataset
2769 : format is used:
2770 : * dataset is given by NPoints*(NIn+NOut) matrix
2771 : * each row corresponds to one example
2772 : * first NIn columns are inputs, next NOut columns are outputs
2773 :
2774 : For classification networks with NIn inputs and NClasses clases following
2775 : dataset format is used:
2776 : * dataset is given by NPoints*(NIn+1) matrix
2777 : * each row corresponds to one example
2778 : * first NIn columns are inputs, last column stores class number (from 0 to
2779 : NClasses-1).
2780 :
2781 : -- ALGLIB --
2782 : Copyright 09.08.2012 by Bochkanov Sergey
2783 : *************************************************************************/
2784 0 : double mlpavgerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2785 : {
2786 : jmp_buf _break_jump;
2787 : alglib_impl::ae_state _alglib_env_state;
2788 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2789 0 : if( setjmp(_break_jump) )
2790 : {
2791 : #if !defined(AE_NO_EXCEPTIONS)
2792 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2793 : #else
2794 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2795 : return 0;
2796 : #endif
2797 : }
2798 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2799 0 : if( _xparams.flags!=0x0 )
2800 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2801 0 : double result = alglib_impl::mlpavgerrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2802 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2803 0 : return *(reinterpret_cast<double*>(&result));
2804 : }
2805 :
2806 : /*************************************************************************
2807 : Average relative error on the test set.
2808 :
2809 : ! COMMERCIAL EDITION OF ALGLIB:
2810 : !
2811 : ! Commercial Edition of ALGLIB includes following important improvements
2812 : ! of this function:
2813 : ! * high-performance native backend with same C# interface (C# version)
2814 : ! * multithreading support (C++ and C# versions)
2815 : !
2816 : ! We recommend you to read 'Working with commercial version' section of
2817 : ! ALGLIB Reference Manual in order to find out how to use performance-
2818 : ! related features provided by commercial edition of ALGLIB.
2819 :
2820 : INPUT PARAMETERS:
2821 : Network - neural network;
2822 : XY - training set, see below for information on the
2823 : training set format;
2824 : NPoints - points count.
2825 :
2826 : RESULT:
2827 : Its meaning for regression task is obvious. As for classification task, it
2828 : means average relative error when estimating posterior probability of
2829 : belonging to the correct class.
2830 :
2831 : DATASET FORMAT:
2832 :
2833 : This function uses two different dataset formats - one for regression
2834 : networks, another one for classification networks.
2835 :
2836 : For regression networks with NIn inputs and NOut outputs following dataset
2837 : format is used:
2838 : * dataset is given by NPoints*(NIn+NOut) matrix
2839 : * each row corresponds to one example
2840 : * first NIn columns are inputs, next NOut columns are outputs
2841 :
2842 : For classification networks with NIn inputs and NClasses clases following
2843 : dataset format is used:
2844 : * dataset is given by NPoints*(NIn+1) matrix
2845 : * each row corresponds to one example
2846 : * first NIn columns are inputs, last column stores class number (from 0 to
2847 : NClasses-1).
2848 :
2849 : -- ALGLIB --
2850 : Copyright 11.03.2008 by Bochkanov Sergey
2851 : *************************************************************************/
2852 0 : double mlpavgrelerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
2853 : {
2854 : jmp_buf _break_jump;
2855 : alglib_impl::ae_state _alglib_env_state;
2856 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2857 0 : if( setjmp(_break_jump) )
2858 : {
2859 : #if !defined(AE_NO_EXCEPTIONS)
2860 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2861 : #else
2862 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2863 : return 0;
2864 : #endif
2865 : }
2866 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2867 0 : if( _xparams.flags!=0x0 )
2868 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2869 0 : double result = alglib_impl::mlpavgrelerror(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2870 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2871 0 : return *(reinterpret_cast<double*>(&result));
2872 : }
2873 :
2874 : /*************************************************************************
2875 : Average relative error on the test set given by sparse matrix.
2876 :
2877 : ! COMMERCIAL EDITION OF ALGLIB:
2878 : !
2879 : ! Commercial Edition of ALGLIB includes following important improvements
2880 : ! of this function:
2881 : ! * high-performance native backend with same C# interface (C# version)
2882 : ! * multithreading support (C++ and C# versions)
2883 : !
2884 : ! We recommend you to read 'Working with commercial version' section of
2885 : ! ALGLIB Reference Manual in order to find out how to use performance-
2886 : ! related features provided by commercial edition of ALGLIB.
2887 :
2888 : INPUT PARAMETERS:
2889 : Network - neural network;
2890 : XY - training set, see below for information on the
2891 : training set format. This function checks correctness
2892 : of the dataset (no NANs/INFs, class numbers are
2893 : correct) and throws exception when incorrect dataset
2894 : is passed. Sparse matrix must use CRS format for
2895 : storage.
2896 : NPoints - points count, >=0.
2897 :
2898 : RESULT:
2899 : Its meaning for regression task is obvious. As for classification task, it
2900 : means average relative error when estimating posterior probability of
2901 : belonging to the correct class.
2902 :
2903 : DATASET FORMAT:
2904 :
2905 : This function uses two different dataset formats - one for regression
2906 : networks, another one for classification networks.
2907 :
2908 : For regression networks with NIn inputs and NOut outputs following dataset
2909 : format is used:
2910 : * dataset is given by NPoints*(NIn+NOut) matrix
2911 : * each row corresponds to one example
2912 : * first NIn columns are inputs, next NOut columns are outputs
2913 :
2914 : For classification networks with NIn inputs and NClasses clases following
2915 : dataset format is used:
2916 : * dataset is given by NPoints*(NIn+1) matrix
2917 : * each row corresponds to one example
2918 : * first NIn columns are inputs, last column stores class number (from 0 to
2919 : NClasses-1).
2920 :
2921 : -- ALGLIB --
2922 : Copyright 09.08.2012 by Bochkanov Sergey
2923 : *************************************************************************/
2924 0 : double mlpavgrelerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
2925 : {
2926 : jmp_buf _break_jump;
2927 : alglib_impl::ae_state _alglib_env_state;
2928 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2929 0 : if( setjmp(_break_jump) )
2930 : {
2931 : #if !defined(AE_NO_EXCEPTIONS)
2932 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2933 : #else
2934 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2935 : return 0;
2936 : #endif
2937 : }
2938 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2939 0 : if( _xparams.flags!=0x0 )
2940 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2941 0 : double result = alglib_impl::mlpavgrelerrorsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
2942 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2943 0 : return *(reinterpret_cast<double*>(&result));
2944 : }
2945 :
2946 : /*************************************************************************
2947 : Gradient calculation
2948 :
2949 : INPUT PARAMETERS:
2950 : Network - network initialized with one of the network creation funcs
2951 : X - input vector, length of array must be at least NIn
2952 : DesiredY- desired outputs, length of array must be at least NOut
2953 : Grad - possibly preallocated array. If size of array is smaller
2954 : than WCount, it will be reallocated. It is recommended to
2955 : reuse previously allocated array to reduce allocation
2956 : overhead.
2957 :
2958 : OUTPUT PARAMETERS:
2959 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
2960 : Grad - gradient of E with respect to weights of network, array[WCount]
2961 :
2962 : -- ALGLIB --
2963 : Copyright 04.11.2007 by Bochkanov Sergey
2964 : *************************************************************************/
2965 0 : void mlpgrad(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams)
2966 : {
2967 : jmp_buf _break_jump;
2968 : alglib_impl::ae_state _alglib_env_state;
2969 0 : alglib_impl::ae_state_init(&_alglib_env_state);
2970 0 : if( setjmp(_break_jump) )
2971 : {
2972 : #if !defined(AE_NO_EXCEPTIONS)
2973 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
2974 : #else
2975 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
2976 : return;
2977 : #endif
2978 : }
2979 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
2980 0 : if( _xparams.flags!=0x0 )
2981 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
2982 0 : alglib_impl::mlpgrad(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(desiredy.c_ptr()), &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
2983 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
2984 0 : return;
2985 : }
2986 :
2987 : /*************************************************************************
2988 : Gradient calculation (natural error function is used)
2989 :
2990 : INPUT PARAMETERS:
2991 : Network - network initialized with one of the network creation funcs
2992 : X - input vector, length of array must be at least NIn
2993 : DesiredY- desired outputs, length of array must be at least NOut
2994 : Grad - possibly preallocated array. If size of array is smaller
2995 : than WCount, it will be reallocated. It is recommended to
2996 : reuse previously allocated array to reduce allocation
2997 : overhead.
2998 :
2999 : OUTPUT PARAMETERS:
3000 : E - error function, sum-of-squares for regression networks,
3001 : cross-entropy for classification networks.
3002 : Grad - gradient of E with respect to weights of network, array[WCount]
3003 :
3004 : -- ALGLIB --
3005 : Copyright 04.11.2007 by Bochkanov Sergey
3006 : *************************************************************************/
3007 0 : void mlpgradn(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams)
3008 : {
3009 : jmp_buf _break_jump;
3010 : alglib_impl::ae_state _alglib_env_state;
3011 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3012 0 : if( setjmp(_break_jump) )
3013 : {
3014 : #if !defined(AE_NO_EXCEPTIONS)
3015 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3016 : #else
3017 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3018 : return;
3019 : #endif
3020 : }
3021 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3022 0 : if( _xparams.flags!=0x0 )
3023 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3024 0 : alglib_impl::mlpgradn(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(desiredy.c_ptr()), &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3025 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3026 0 : return;
3027 : }
3028 :
3029 : /*************************************************************************
3030 : Batch gradient calculation for a set of inputs/outputs
3031 :
3032 : ! COMMERCIAL EDITION OF ALGLIB:
3033 : !
3034 : ! Commercial Edition of ALGLIB includes following important improvements
3035 : ! of this function:
3036 : ! * high-performance native backend with same C# interface (C# version)
3037 : ! * multithreading support (C++ and C# versions)
3038 : !
3039 : ! We recommend you to read 'Working with commercial version' section of
3040 : ! ALGLIB Reference Manual in order to find out how to use performance-
3041 : ! related features provided by commercial edition of ALGLIB.
3042 :
3043 : INPUT PARAMETERS:
3044 : Network - network initialized with one of the network creation funcs
3045 : XY - original dataset in dense format; one sample = one row:
3046 : * first NIn columns contain inputs,
3047 : * for regression problem, next NOut columns store
3048 : desired outputs.
3049 : * for classification problem, next column (just one!)
3050 : stores class number.
3051 : SSize - number of elements in XY
3052 : Grad - possibly preallocated array. If size of array is smaller
3053 : than WCount, it will be reallocated. It is recommended to
3054 : reuse previously allocated array to reduce allocation
3055 : overhead.
3056 :
3057 : OUTPUT PARAMETERS:
3058 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
3059 : Grad - gradient of E with respect to weights of network, array[WCount]
3060 :
3061 : -- ALGLIB --
3062 : Copyright 04.11.2007 by Bochkanov Sergey
3063 : *************************************************************************/
3064 0 : void mlpgradbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
3065 : {
3066 : jmp_buf _break_jump;
3067 : alglib_impl::ae_state _alglib_env_state;
3068 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3069 0 : if( setjmp(_break_jump) )
3070 : {
3071 : #if !defined(AE_NO_EXCEPTIONS)
3072 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3073 : #else
3074 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3075 : return;
3076 : #endif
3077 : }
3078 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3079 0 : if( _xparams.flags!=0x0 )
3080 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3081 0 : alglib_impl::mlpgradbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3082 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3083 0 : return;
3084 : }
3085 :
3086 : /*************************************************************************
3087 : Batch gradient calculation for a set of inputs/outputs given by sparse
3088 : matrices
3089 :
3090 : ! COMMERCIAL EDITION OF ALGLIB:
3091 : !
3092 : ! Commercial Edition of ALGLIB includes following important improvements
3093 : ! of this function:
3094 : ! * high-performance native backend with same C# interface (C# version)
3095 : ! * multithreading support (C++ and C# versions)
3096 : !
3097 : ! We recommend you to read 'Working with commercial version' section of
3098 : ! ALGLIB Reference Manual in order to find out how to use performance-
3099 : ! related features provided by commercial edition of ALGLIB.
3100 :
3101 : INPUT PARAMETERS:
3102 : Network - network initialized with one of the network creation funcs
3103 : XY - original dataset in sparse format; one sample = one row:
3104 : * MATRIX MUST BE STORED IN CRS FORMAT
3105 : * first NIn columns contain inputs.
3106 : * for regression problem, next NOut columns store
3107 : desired outputs.
3108 : * for classification problem, next column (just one!)
3109 : stores class number.
3110 : SSize - number of elements in XY
3111 : Grad - possibly preallocated array. If size of array is smaller
3112 : than WCount, it will be reallocated. It is recommended to
3113 : reuse previously allocated array to reduce allocation
3114 : overhead.
3115 :
3116 : OUTPUT PARAMETERS:
3117 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
3118 : Grad - gradient of E with respect to weights of network, array[WCount]
3119 :
3120 : -- ALGLIB --
3121 : Copyright 26.07.2012 by Bochkanov Sergey
3122 : *************************************************************************/
3123 0 : void mlpgradbatchsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
3124 : {
3125 : jmp_buf _break_jump;
3126 : alglib_impl::ae_state _alglib_env_state;
3127 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3128 0 : if( setjmp(_break_jump) )
3129 : {
3130 : #if !defined(AE_NO_EXCEPTIONS)
3131 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3132 : #else
3133 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3134 : return;
3135 : #endif
3136 : }
3137 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3138 0 : if( _xparams.flags!=0x0 )
3139 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3140 0 : alglib_impl::mlpgradbatchsparse(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3141 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3142 0 : return;
3143 : }
3144 :
3145 : /*************************************************************************
3146 : Batch gradient calculation for a subset of dataset
3147 :
3148 : ! COMMERCIAL EDITION OF ALGLIB:
3149 : !
3150 : ! Commercial Edition of ALGLIB includes following important improvements
3151 : ! of this function:
3152 : ! * high-performance native backend with same C# interface (C# version)
3153 : ! * multithreading support (C++ and C# versions)
3154 : !
3155 : ! We recommend you to read 'Working with commercial version' section of
3156 : ! ALGLIB Reference Manual in order to find out how to use performance-
3157 : ! related features provided by commercial edition of ALGLIB.
3158 :
3159 : INPUT PARAMETERS:
3160 : Network - network initialized with one of the network creation funcs
3161 : XY - original dataset in dense format; one sample = one row:
3162 : * first NIn columns contain inputs,
3163 : * for regression problem, next NOut columns store
3164 : desired outputs.
3165 : * for classification problem, next column (just one!)
3166 : stores class number.
3167 : SetSize - real size of XY, SetSize>=0;
3168 : Idx - subset of SubsetSize elements, array[SubsetSize]:
3169 : * Idx[I] stores row index in the original dataset which is
3170 : given by XY. Gradient is calculated with respect to rows
3171 : whose indexes are stored in Idx[].
3172 : * Idx[] must store correct indexes; this function throws
3173 : an exception in case incorrect index (less than 0 or
3174 : larger than rows(XY)) is given
3175 : * Idx[] may store indexes in any order and even with
3176 : repetitions.
3177 : SubsetSize- number of elements in Idx[] array:
3178 : * positive value means that subset given by Idx[] is processed
3179 : * zero value results in zero gradient
3180 : * negative value means that full dataset is processed
3181 : Grad - possibly preallocated array. If size of array is smaller
3182 : than WCount, it will be reallocated. It is recommended to
3183 : reuse previously allocated array to reduce allocation
3184 : overhead.
3185 :
3186 : OUTPUT PARAMETERS:
3187 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
3188 : Grad - gradient of E with respect to weights of network,
3189 : array[WCount]
3190 :
3191 : -- ALGLIB --
3192 : Copyright 26.07.2012 by Bochkanov Sergey
3193 : *************************************************************************/
3194 0 : void mlpgradbatchsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams)
3195 : {
3196 : jmp_buf _break_jump;
3197 : alglib_impl::ae_state _alglib_env_state;
3198 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3199 0 : if( setjmp(_break_jump) )
3200 : {
3201 : #if !defined(AE_NO_EXCEPTIONS)
3202 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3203 : #else
3204 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3205 : return;
3206 : #endif
3207 : }
3208 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3209 0 : if( _xparams.flags!=0x0 )
3210 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3211 0 : alglib_impl::mlpgradbatchsubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(idx.c_ptr()), subsetsize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3212 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3213 0 : return;
3214 : }
3215 :
3216 : /*************************************************************************
3217 : Batch gradient calculation for a set of inputs/outputs for a subset of
3218 : dataset given by set of indexes.
3219 :
3220 : ! COMMERCIAL EDITION OF ALGLIB:
3221 : !
3222 : ! Commercial Edition of ALGLIB includes following important improvements
3223 : ! of this function:
3224 : ! * high-performance native backend with same C# interface (C# version)
3225 : ! * multithreading support (C++ and C# versions)
3226 : !
3227 : ! We recommend you to read 'Working with commercial version' section of
3228 : ! ALGLIB Reference Manual in order to find out how to use performance-
3229 : ! related features provided by commercial edition of ALGLIB.
3230 :
3231 : INPUT PARAMETERS:
3232 : Network - network initialized with one of the network creation funcs
3233 : XY - original dataset in sparse format; one sample = one row:
3234 : * MATRIX MUST BE STORED IN CRS FORMAT
3235 : * first NIn columns contain inputs,
3236 : * for regression problem, next NOut columns store
3237 : desired outputs.
3238 : * for classification problem, next column (just one!)
3239 : stores class number.
3240 : SetSize - real size of XY, SetSize>=0;
3241 : Idx - subset of SubsetSize elements, array[SubsetSize]:
3242 : * Idx[I] stores row index in the original dataset which is
3243 : given by XY. Gradient is calculated with respect to rows
3244 : whose indexes are stored in Idx[].
3245 : * Idx[] must store correct indexes; this function throws
3246 : an exception in case incorrect index (less than 0 or
3247 : larger than rows(XY)) is given
3248 : * Idx[] may store indexes in any order and even with
3249 : repetitions.
3250 : SubsetSize- number of elements in Idx[] array:
3251 : * positive value means that subset given by Idx[] is processed
3252 : * zero value results in zero gradient
3253 : * negative value means that full dataset is processed
3254 : Grad - possibly preallocated array. If size of array is smaller
3255 : than WCount, it will be reallocated. It is recommended to
3256 : reuse previously allocated array to reduce allocation
3257 : overhead.
3258 :
3259 : OUTPUT PARAMETERS:
3260 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
3261 : Grad - gradient of E with respect to weights of network,
3262 : array[WCount]
3263 :
3264 : NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse
3265 : function.
3266 :
3267 : -- ALGLIB --
3268 : Copyright 26.07.2012 by Bochkanov Sergey
3269 : *************************************************************************/
3270 0 : void mlpgradbatchsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams)
3271 : {
3272 : jmp_buf _break_jump;
3273 : alglib_impl::ae_state _alglib_env_state;
3274 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3275 0 : if( setjmp(_break_jump) )
3276 : {
3277 : #if !defined(AE_NO_EXCEPTIONS)
3278 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3279 : #else
3280 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3281 : return;
3282 : #endif
3283 : }
3284 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3285 0 : if( _xparams.flags!=0x0 )
3286 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3287 0 : alglib_impl::mlpgradbatchsparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(idx.c_ptr()), subsetsize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3288 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3289 0 : return;
3290 : }
3291 :
3292 : /*************************************************************************
3293 : Batch gradient calculation for a set of inputs/outputs
3294 : (natural error function is used)
3295 :
3296 : INPUT PARAMETERS:
3297 : Network - network initialized with one of the network creation funcs
3298 : XY - set of inputs/outputs; one sample = one row;
3299 : first NIn columns contain inputs,
3300 : next NOut columns - desired outputs.
3301 : SSize - number of elements in XY
3302 : Grad - possibly preallocated array. If size of array is smaller
3303 : than WCount, it will be reallocated. It is recommended to
3304 : reuse previously allocated array to reduce allocation
3305 : overhead.
3306 :
3307 : OUTPUT PARAMETERS:
3308 : E - error function, sum-of-squares for regression networks,
3309 : cross-entropy for classification networks.
3310 : Grad - gradient of E with respect to weights of network, array[WCount]
3311 :
3312 : -- ALGLIB --
3313 : Copyright 04.11.2007 by Bochkanov Sergey
3314 : *************************************************************************/
3315 0 : void mlpgradnbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams)
3316 : {
3317 : jmp_buf _break_jump;
3318 : alglib_impl::ae_state _alglib_env_state;
3319 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3320 0 : if( setjmp(_break_jump) )
3321 : {
3322 : #if !defined(AE_NO_EXCEPTIONS)
3323 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3324 : #else
3325 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3326 : return;
3327 : #endif
3328 : }
3329 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3330 0 : if( _xparams.flags!=0x0 )
3331 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3332 0 : alglib_impl::mlpgradnbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), &_alglib_env_state);
3333 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3334 0 : return;
3335 : }
3336 :
3337 : /*************************************************************************
3338 : Batch Hessian calculation (natural error function) using R-algorithm.
3339 : Internal subroutine.
3340 :
3341 : -- ALGLIB --
3342 : Copyright 26.01.2008 by Bochkanov Sergey.
3343 :
3344 : Hessian calculation based on R-algorithm described in
3345 : "Fast Exact Multiplication by the Hessian",
3346 : B. A. Pearlmutter,
3347 : Neural Computation, 1994.
3348 : *************************************************************************/
3349 0 : void mlphessiannbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams)
3350 : {
3351 : jmp_buf _break_jump;
3352 : alglib_impl::ae_state _alglib_env_state;
3353 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3354 0 : if( setjmp(_break_jump) )
3355 : {
3356 : #if !defined(AE_NO_EXCEPTIONS)
3357 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3358 : #else
3359 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3360 : return;
3361 : #endif
3362 : }
3363 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3364 0 : if( _xparams.flags!=0x0 )
3365 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3366 0 : alglib_impl::mlphessiannbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
3367 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3368 0 : return;
3369 : }
3370 :
3371 : /*************************************************************************
3372 : Batch Hessian calculation using R-algorithm.
3373 : Internal subroutine.
3374 :
3375 : -- ALGLIB --
3376 : Copyright 26.01.2008 by Bochkanov Sergey.
3377 :
3378 : Hessian calculation based on R-algorithm described in
3379 : "Fast Exact Multiplication by the Hessian",
3380 : B. A. Pearlmutter,
3381 : Neural Computation, 1994.
3382 : *************************************************************************/
3383 0 : void mlphessianbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams)
3384 : {
3385 : jmp_buf _break_jump;
3386 : alglib_impl::ae_state _alglib_env_state;
3387 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3388 0 : if( setjmp(_break_jump) )
3389 : {
3390 : #if !defined(AE_NO_EXCEPTIONS)
3391 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3392 : #else
3393 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3394 : return;
3395 : #endif
3396 : }
3397 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3398 0 : if( _xparams.flags!=0x0 )
3399 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3400 0 : alglib_impl::mlphessianbatch(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &e, const_cast<alglib_impl::ae_vector*>(grad.c_ptr()), const_cast<alglib_impl::ae_matrix*>(h.c_ptr()), &_alglib_env_state);
3401 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3402 0 : return;
3403 : }
3404 :
3405 : /*************************************************************************
3406 : Calculation of all types of errors on subset of dataset.
3407 :
3408 : ! COMMERCIAL EDITION OF ALGLIB:
3409 : !
3410 : ! Commercial Edition of ALGLIB includes following important improvements
3411 : ! of this function:
3412 : ! * high-performance native backend with same C# interface (C# version)
3413 : ! * multithreading support (C++ and C# versions)
3414 : !
3415 : ! We recommend you to read 'Working with commercial version' section of
3416 : ! ALGLIB Reference Manual in order to find out how to use performance-
3417 : ! related features provided by commercial edition of ALGLIB.
3418 :
3419 : INPUT PARAMETERS:
3420 : Network - network initialized with one of the network creation funcs
3421 : XY - original dataset; one sample = one row;
3422 : first NIn columns contain inputs,
3423 : next NOut columns - desired outputs.
3424 : SetSize - real size of XY, SetSize>=0;
3425 : Subset - subset of SubsetSize elements, array[SubsetSize];
3426 : SubsetSize- number of elements in Subset[] array:
3427 : * if SubsetSize>0, rows of XY with indices Subset[0]...
3428 : ...Subset[SubsetSize-1] are processed
3429 : * if SubsetSize=0, zeros are returned
3430 : * if SubsetSize<0, entire dataset is processed; Subset[]
3431 : array is ignored in this case.
3432 :
3433 : OUTPUT PARAMETERS:
3434 : Rep - it contains all type of errors.
3435 :
3436 : -- ALGLIB --
3437 : Copyright 04.09.2012 by Bochkanov Sergey
3438 : *************************************************************************/
3439 0 : void mlpallerrorssubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams)
3440 : {
3441 : jmp_buf _break_jump;
3442 : alglib_impl::ae_state _alglib_env_state;
3443 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3444 0 : if( setjmp(_break_jump) )
3445 : {
3446 : #if !defined(AE_NO_EXCEPTIONS)
3447 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3448 : #else
3449 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3450 : return;
3451 : #endif
3452 : }
3453 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3454 0 : if( _xparams.flags!=0x0 )
3455 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3456 0 : alglib_impl::mlpallerrorssubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, const_cast<alglib_impl::modelerrors*>(rep.c_ptr()), &_alglib_env_state);
3457 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3458 0 : return;
3459 : }
3460 :
3461 : /*************************************************************************
3462 : Calculation of all types of errors on subset of dataset.
3463 :
3464 : ! COMMERCIAL EDITION OF ALGLIB:
3465 : !
3466 : ! Commercial Edition of ALGLIB includes following important improvements
3467 : ! of this function:
3468 : ! * high-performance native backend with same C# interface (C# version)
3469 : ! * multithreading support (C++ and C# versions)
3470 : !
3471 : ! We recommend you to read 'Working with commercial version' section of
3472 : ! ALGLIB Reference Manual in order to find out how to use performance-
3473 : ! related features provided by commercial edition of ALGLIB.
3474 :
3475 : INPUT PARAMETERS:
3476 : Network - network initialized with one of the network creation funcs
3477 : XY - original dataset given by sparse matrix;
3478 : one sample = one row;
3479 : first NIn columns contain inputs,
3480 : next NOut columns - desired outputs.
3481 : SetSize - real size of XY, SetSize>=0;
3482 : Subset - subset of SubsetSize elements, array[SubsetSize];
3483 : SubsetSize- number of elements in Subset[] array:
3484 : * if SubsetSize>0, rows of XY with indices Subset[0]...
3485 : ...Subset[SubsetSize-1] are processed
3486 : * if SubsetSize=0, zeros are returned
3487 : * if SubsetSize<0, entire dataset is processed; Subset[]
3488 : array is ignored in this case.
3489 :
3490 : OUTPUT PARAMETERS:
3491 : Rep - it contains all type of errors.
3492 :
3493 :
3494 : -- ALGLIB --
3495 : Copyright 04.09.2012 by Bochkanov Sergey
3496 : *************************************************************************/
3497 0 : void mlpallerrorssparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams)
3498 : {
3499 : jmp_buf _break_jump;
3500 : alglib_impl::ae_state _alglib_env_state;
3501 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3502 0 : if( setjmp(_break_jump) )
3503 : {
3504 : #if !defined(AE_NO_EXCEPTIONS)
3505 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3506 : #else
3507 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3508 : return;
3509 : #endif
3510 : }
3511 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3512 0 : if( _xparams.flags!=0x0 )
3513 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3514 0 : alglib_impl::mlpallerrorssparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, const_cast<alglib_impl::modelerrors*>(rep.c_ptr()), &_alglib_env_state);
3515 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3516 0 : return;
3517 : }
3518 :
3519 : /*************************************************************************
3520 : Error of the neural network on subset of dataset.
3521 :
3522 : ! COMMERCIAL EDITION OF ALGLIB:
3523 : !
3524 : ! Commercial Edition of ALGLIB includes following important improvements
3525 : ! of this function:
3526 : ! * high-performance native backend with same C# interface (C# version)
3527 : ! * multithreading support (C++ and C# versions)
3528 : !
3529 : ! We recommend you to read 'Working with commercial version' section of
3530 : ! ALGLIB Reference Manual in order to find out how to use performance-
3531 : ! related features provided by commercial edition of ALGLIB.
3532 :
3533 : INPUT PARAMETERS:
3534 : Network - neural network;
3535 : XY - training set, see below for information on the
3536 : training set format;
3537 : SetSize - real size of XY, SetSize>=0;
3538 : Subset - subset of SubsetSize elements, array[SubsetSize];
3539 : SubsetSize- number of elements in Subset[] array:
3540 : * if SubsetSize>0, rows of XY with indices Subset[0]...
3541 : ...Subset[SubsetSize-1] are processed
3542 : * if SubsetSize=0, zeros are returned
3543 : * if SubsetSize<0, entire dataset is processed; Subset[]
3544 : array is ignored in this case.
3545 :
3546 : RESULT:
3547 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
3548 :
3549 : DATASET FORMAT:
3550 :
3551 : This function uses two different dataset formats - one for regression
3552 : networks, another one for classification networks.
3553 :
3554 : For regression networks with NIn inputs and NOut outputs following dataset
3555 : format is used:
3556 : * dataset is given by NPoints*(NIn+NOut) matrix
3557 : * each row corresponds to one example
3558 : * first NIn columns are inputs, next NOut columns are outputs
3559 :
3560 : For classification networks with NIn inputs and NClasses clases following
3561 : dataset format is used:
3562 : * dataset is given by NPoints*(NIn+1) matrix
3563 : * each row corresponds to one example
3564 : * first NIn columns are inputs, last column stores class number (from 0 to
3565 : NClasses-1).
3566 :
3567 : -- ALGLIB --
3568 : Copyright 04.09.2012 by Bochkanov Sergey
3569 : *************************************************************************/
3570 0 : double mlperrorsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams)
3571 : {
3572 : jmp_buf _break_jump;
3573 : alglib_impl::ae_state _alglib_env_state;
3574 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3575 0 : if( setjmp(_break_jump) )
3576 : {
3577 : #if !defined(AE_NO_EXCEPTIONS)
3578 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3579 : #else
3580 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3581 : return 0;
3582 : #endif
3583 : }
3584 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3585 0 : if( _xparams.flags!=0x0 )
3586 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3587 0 : double result = alglib_impl::mlperrorsubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, &_alglib_env_state);
3588 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3589 0 : return *(reinterpret_cast<double*>(&result));
3590 : }
3591 :
3592 : /*************************************************************************
3593 : Error of the neural network on subset of sparse dataset.
3594 :
3595 : ! COMMERCIAL EDITION OF ALGLIB:
3596 : !
3597 : ! Commercial Edition of ALGLIB includes following important improvements
3598 : ! of this function:
3599 : ! * high-performance native backend with same C# interface (C# version)
3600 : ! * multithreading support (C++ and C# versions)
3601 : !
3602 : ! We recommend you to read 'Working with commercial version' section of
3603 : ! ALGLIB Reference Manual in order to find out how to use performance-
3604 : ! related features provided by commercial edition of ALGLIB.
3605 :
3606 : INPUT PARAMETERS:
3607 : Network - neural network;
3608 : XY - training set, see below for information on the
3609 : training set format. This function checks correctness
3610 : of the dataset (no NANs/INFs, class numbers are
3611 : correct) and throws exception when incorrect dataset
3612 : is passed. Sparse matrix must use CRS format for
3613 : storage.
3614 : SetSize - real size of XY, SetSize>=0;
3615 : it is used when SubsetSize<0;
3616 : Subset - subset of SubsetSize elements, array[SubsetSize];
3617 : SubsetSize- number of elements in Subset[] array:
3618 : * if SubsetSize>0, rows of XY with indices Subset[0]...
3619 : ...Subset[SubsetSize-1] are processed
3620 : * if SubsetSize=0, zeros are returned
3621 : * if SubsetSize<0, entire dataset is processed; Subset[]
3622 : array is ignored in this case.
3623 :
3624 : RESULT:
3625 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
3626 :
3627 : DATASET FORMAT:
3628 :
3629 : This function uses two different dataset formats - one for regression
3630 : networks, another one for classification networks.
3631 :
3632 : For regression networks with NIn inputs and NOut outputs following dataset
3633 : format is used:
3634 : * dataset is given by NPoints*(NIn+NOut) matrix
3635 : * each row corresponds to one example
3636 : * first NIn columns are inputs, next NOut columns are outputs
3637 :
3638 : For classification networks with NIn inputs and NClasses clases following
3639 : dataset format is used:
3640 : * dataset is given by NPoints*(NIn+1) matrix
3641 : * each row corresponds to one example
3642 : * first NIn columns are inputs, last column stores class number (from 0 to
3643 : NClasses-1).
3644 :
3645 : -- ALGLIB --
3646 : Copyright 04.09.2012 by Bochkanov Sergey
3647 : *************************************************************************/
3648 0 : double mlperrorsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams)
3649 : {
3650 : jmp_buf _break_jump;
3651 : alglib_impl::ae_state _alglib_env_state;
3652 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3653 0 : if( setjmp(_break_jump) )
3654 : {
3655 : #if !defined(AE_NO_EXCEPTIONS)
3656 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3657 : #else
3658 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3659 : return 0;
3660 : #endif
3661 : }
3662 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3663 0 : if( _xparams.flags!=0x0 )
3664 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3665 0 : double result = alglib_impl::mlperrorsparsesubset(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), setsize, const_cast<alglib_impl::ae_vector*>(subset.c_ptr()), subsetsize, &_alglib_env_state);
3666 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3667 0 : return *(reinterpret_cast<double*>(&result));
3668 : }
3669 : #endif
3670 :
3671 : #if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
3672 : /*************************************************************************
3673 : Multiclass Fisher LDA
3674 :
3675 : Subroutine finds coefficients of linear combination which optimally separates
3676 : training set on classes.
3677 :
3678 : COMMERCIAL EDITION OF ALGLIB:
3679 :
3680 : ! Commercial version of ALGLIB includes two important improvements of
3681 : ! this function, which can be used from C++ and C#:
3682 : ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
3683 : ! * multithreading support
3684 : !
3685 : ! Intel MKL gives approximately constant (with respect to number of
3686 : ! worker threads) acceleration factor which depends on CPU being used,
3687 : ! problem size and "baseline" ALGLIB edition which is used for
3688 : ! comparison. Best results are achieved for high-dimensional problems
3689 : ! (NVars is at least 256).
3690 : !
3691 : ! Multithreading is used to accelerate initial phase of LDA, which
3692 : ! includes calculation of products of large matrices. Again, for best
3693 : ! efficiency problem must be high-dimensional.
3694 : !
3695 : ! Generally, commercial ALGLIB is several times faster than open-source
3696 : ! generic C edition, and many times faster than open-source C# edition.
3697 : !
3698 : ! We recommend you to read 'Working with commercial version' section of
3699 : ! ALGLIB Reference Manual in order to find out how to use performance-
3700 : ! related features provided by commercial edition of ALGLIB.
3701 :
3702 : INPUT PARAMETERS:
3703 : XY - training set, array[0..NPoints-1,0..NVars].
3704 : First NVars columns store values of independent
3705 : variables, next column stores number of class (from 0
3706 : to NClasses-1) which dataset element belongs to. Fractional
3707 : values are rounded to nearest integer.
3708 : NPoints - training set size, NPoints>=0
3709 : NVars - number of independent variables, NVars>=1
3710 : NClasses - number of classes, NClasses>=2
3711 :
3712 :
3713 : OUTPUT PARAMETERS:
3714 : Info - return code:
3715 : * -4, if internal EVD subroutine hasn't converged
3716 : * -2, if there is a point with class number
3717 : outside of [0..NClasses-1].
3718 : * -1, if incorrect parameters was passed (NPoints<0,
3719 : NVars<1, NClasses<2)
3720 : * 1, if task has been solved
3721 : * 2, if there was a multicollinearity in training set,
3722 : but task has been solved.
3723 : W - linear combination coefficients, array[0..NVars-1]
3724 :
3725 : -- ALGLIB --
3726 : Copyright 31.05.2008 by Bochkanov Sergey
3727 : *************************************************************************/
3728 0 : void fisherlda(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_1d_array &w, const xparams _xparams)
3729 : {
3730 : jmp_buf _break_jump;
3731 : alglib_impl::ae_state _alglib_env_state;
3732 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3733 0 : if( setjmp(_break_jump) )
3734 : {
3735 : #if !defined(AE_NO_EXCEPTIONS)
3736 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3737 : #else
3738 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3739 : return;
3740 : #endif
3741 : }
3742 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3743 0 : if( _xparams.flags!=0x0 )
3744 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3745 0 : alglib_impl::fisherlda(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::ae_vector*>(w.c_ptr()), &_alglib_env_state);
3746 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3747 0 : return;
3748 : }
3749 :
3750 : /*************************************************************************
3751 : N-dimensional multiclass Fisher LDA
3752 :
3753 : Subroutine finds coefficients of linear combinations which optimally separates
3754 : training set on classes. It returns N-dimensional basis whose vector are sorted
3755 : by quality of training set separation (in descending order).
3756 :
3757 : ! COMMERCIAL EDITION OF ALGLIB:
3758 : !
3759 : ! Commercial Edition of ALGLIB includes following important improvements
3760 : ! of this function:
3761 : ! * high-performance native backend with same C# interface (C# version)
3762 : ! * multithreading support (C++ and C# versions)
3763 : ! * hardware vendor (Intel) implementations of linear algebra primitives
3764 : ! (C++ and C# versions, x86/x64 platform)
3765 : !
3766 : ! We recommend you to read 'Working with commercial version' section of
3767 : ! ALGLIB Reference Manual in order to find out how to use performance-
3768 : ! related features provided by commercial edition of ALGLIB.
3769 :
3770 : INPUT PARAMETERS:
3771 : XY - training set, array[0..NPoints-1,0..NVars].
3772 : First NVars columns store values of independent
3773 : variables, next column stores number of class (from 0
3774 : to NClasses-1) which dataset element belongs to. Fractional
3775 : values are rounded to nearest integer.
3776 : NPoints - training set size, NPoints>=0
3777 : NVars - number of independent variables, NVars>=1
3778 : NClasses - number of classes, NClasses>=2
3779 :
3780 :
3781 : OUTPUT PARAMETERS:
3782 : Info - return code:
3783 : * -4, if internal EVD subroutine hasn't converged
3784 : * -2, if there is a point with class number
3785 : outside of [0..NClasses-1].
3786 : * -1, if incorrect parameters was passed (NPoints<0,
3787 : NVars<1, NClasses<2)
3788 : * 1, if task has been solved
3789 : * 2, if there was a multicollinearity in training set,
3790 : but task has been solved.
3791 : W - basis, array[0..NVars-1,0..NVars-1]
3792 : columns of matrix stores basis vectors, sorted by
3793 : quality of training set separation (in descending order)
3794 :
3795 : -- ALGLIB --
3796 : Copyright 31.05.2008 by Bochkanov Sergey
3797 : *************************************************************************/
3798 0 : void fisherldan(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_2d_array &w, const xparams _xparams)
3799 : {
3800 : jmp_buf _break_jump;
3801 : alglib_impl::ae_state _alglib_env_state;
3802 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3803 0 : if( setjmp(_break_jump) )
3804 : {
3805 : #if !defined(AE_NO_EXCEPTIONS)
3806 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
3807 : #else
3808 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
3809 : return;
3810 : #endif
3811 : }
3812 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
3813 0 : if( _xparams.flags!=0x0 )
3814 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
3815 0 : alglib_impl::fisherldan(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::ae_matrix*>(w.c_ptr()), &_alglib_env_state);
3816 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
3817 0 : return;
3818 : }
3819 : #endif
3820 :
3821 : #if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
3822 : /*************************************************************************
3823 : This object stores state of the SSA model.
3824 :
3825 : You should use ALGLIB functions to work with this object.
3826 : *************************************************************************/
3827 0 : _ssamodel_owner::_ssamodel_owner()
3828 : {
3829 : jmp_buf _break_jump;
3830 : alglib_impl::ae_state _state;
3831 :
3832 0 : alglib_impl::ae_state_init(&_state);
3833 0 : if( setjmp(_break_jump) )
3834 : {
3835 0 : if( p_struct!=NULL )
3836 : {
3837 0 : alglib_impl::_ssamodel_destroy(p_struct);
3838 0 : alglib_impl::ae_free(p_struct);
3839 : }
3840 0 : p_struct = NULL;
3841 : #if !defined(AE_NO_EXCEPTIONS)
3842 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3843 : #else
3844 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3845 : return;
3846 : #endif
3847 : }
3848 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3849 0 : p_struct = NULL;
3850 0 : p_struct = (alglib_impl::ssamodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::ssamodel), &_state);
3851 0 : memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
3852 0 : alglib_impl::_ssamodel_init(p_struct, &_state, ae_false);
3853 0 : ae_state_clear(&_state);
3854 0 : }
3855 :
3856 0 : _ssamodel_owner::_ssamodel_owner(const _ssamodel_owner &rhs)
3857 : {
3858 : jmp_buf _break_jump;
3859 : alglib_impl::ae_state _state;
3860 :
3861 0 : alglib_impl::ae_state_init(&_state);
3862 0 : if( setjmp(_break_jump) )
3863 : {
3864 0 : if( p_struct!=NULL )
3865 : {
3866 0 : alglib_impl::_ssamodel_destroy(p_struct);
3867 0 : alglib_impl::ae_free(p_struct);
3868 : }
3869 0 : p_struct = NULL;
3870 : #if !defined(AE_NO_EXCEPTIONS)
3871 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3872 : #else
3873 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3874 : return;
3875 : #endif
3876 : }
3877 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3878 0 : p_struct = NULL;
3879 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ssamodel copy constructor failure (source is not initialized)", &_state);
3880 0 : p_struct = (alglib_impl::ssamodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::ssamodel), &_state);
3881 0 : memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
3882 0 : alglib_impl::_ssamodel_init_copy(p_struct, const_cast<alglib_impl::ssamodel*>(rhs.p_struct), &_state, ae_false);
3883 0 : ae_state_clear(&_state);
3884 0 : }
3885 :
3886 0 : _ssamodel_owner& _ssamodel_owner::operator=(const _ssamodel_owner &rhs)
3887 : {
3888 0 : if( this==&rhs )
3889 0 : return *this;
3890 : jmp_buf _break_jump;
3891 : alglib_impl::ae_state _state;
3892 :
3893 0 : alglib_impl::ae_state_init(&_state);
3894 0 : if( setjmp(_break_jump) )
3895 : {
3896 : #if !defined(AE_NO_EXCEPTIONS)
3897 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
3898 : #else
3899 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
3900 : return *this;
3901 : #endif
3902 : }
3903 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
3904 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: ssamodel assignment constructor failure (destination is not initialized)", &_state);
3905 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ssamodel assignment constructor failure (source is not initialized)", &_state);
3906 0 : alglib_impl::_ssamodel_destroy(p_struct);
3907 0 : memset(p_struct, 0, sizeof(alglib_impl::ssamodel));
3908 0 : alglib_impl::_ssamodel_init_copy(p_struct, const_cast<alglib_impl::ssamodel*>(rhs.p_struct), &_state, ae_false);
3909 0 : ae_state_clear(&_state);
3910 0 : return *this;
3911 : }
3912 :
3913 0 : _ssamodel_owner::~_ssamodel_owner()
3914 : {
3915 0 : if( p_struct!=NULL )
3916 : {
3917 0 : alglib_impl::_ssamodel_destroy(p_struct);
3918 0 : ae_free(p_struct);
3919 : }
3920 0 : }
3921 :
3922 0 : alglib_impl::ssamodel* _ssamodel_owner::c_ptr()
3923 : {
3924 0 : return p_struct;
3925 : }
3926 :
3927 0 : alglib_impl::ssamodel* _ssamodel_owner::c_ptr() const
3928 : {
3929 0 : return const_cast<alglib_impl::ssamodel*>(p_struct);
3930 : }
3931 0 : ssamodel::ssamodel() : _ssamodel_owner()
3932 : {
3933 0 : }
3934 :
3935 0 : ssamodel::ssamodel(const ssamodel &rhs):_ssamodel_owner(rhs)
3936 : {
3937 0 : }
3938 :
3939 0 : ssamodel& ssamodel::operator=(const ssamodel &rhs)
3940 : {
3941 0 : if( this==&rhs )
3942 0 : return *this;
3943 0 : _ssamodel_owner::operator=(rhs);
3944 0 : return *this;
3945 : }
3946 :
3947 0 : ssamodel::~ssamodel()
3948 : {
3949 0 : }
3950 :
3951 : /*************************************************************************
3952 : This function creates SSA model object. Right after creation model is in
3953 : "dummy" mode - you can add data, but analyzing/prediction will return
3954 : just zeros (it assumes that basis is empty).
3955 :
3956 : HOW TO USE SSA MODEL:
3957 :
3958 : 1. create model with ssacreate()
3959 : 2. add data with one/many ssaaddsequence() calls
3960 : 3. choose SSA algorithm with one of ssasetalgo...() functions:
3961 : * ssasetalgotopkdirect() for direct one-run analysis
3962 : * ssasetalgotopkrealtime() for algorithm optimized for many subsequent
3963 : runs with warm-start capabilities
3964 : * ssasetalgoprecomputed() for user-supplied basis
3965 : 4. set window width with ssasetwindow()
3966 : 5. perform one of the analysis-related activities:
3967 : a) call ssagetbasis() to get basis
3968 : b) call ssaanalyzelast() ssaanalyzesequence() or ssaanalyzelastwindow()
3969 : to perform analysis (trend/noise separation)
3970 : c) call one of the forecasting functions (ssaforecastlast() or
3971 : ssaforecastsequence()) to perform prediction; alternatively, you can
3972 : extract linear recurrence coefficients with ssagetlrr().
3973 : SSA analysis will be performed during first call to analysis-related
3974 : function. SSA model is smart enough to track all changes in the dataset
3975 : and model settings, to cache previously computed basis and to
3976 : re-evaluate basis only when necessary.
3977 :
3978 : Additionally, if your setting involves constant stream of incoming data,
3979 : you can perform quick update already calculated model with one of the
3980 : incremental append-and-update functions: ssaappendpointandupdate() or
3981 : ssaappendsequenceandupdate().
3982 :
3983 : NOTE: steps (2), (3), (4) can be performed in arbitrary order.
3984 :
3985 : INPUT PARAMETERS:
3986 : none
3987 :
3988 : OUTPUT PARAMETERS:
3989 : S - structure which stores model state
3990 :
3991 : -- ALGLIB --
3992 : Copyright 30.10.2017 by Bochkanov Sergey
3993 : *************************************************************************/
3994 0 : void ssacreate(ssamodel &s, const xparams _xparams)
3995 : {
3996 : jmp_buf _break_jump;
3997 : alglib_impl::ae_state _alglib_env_state;
3998 0 : alglib_impl::ae_state_init(&_alglib_env_state);
3999 0 : if( setjmp(_break_jump) )
4000 : {
4001 : #if !defined(AE_NO_EXCEPTIONS)
4002 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4003 : #else
4004 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4005 : return;
4006 : #endif
4007 : }
4008 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4009 0 : if( _xparams.flags!=0x0 )
4010 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4011 0 : alglib_impl::ssacreate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), &_alglib_env_state);
4012 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4013 0 : return;
4014 : }
4015 :
4016 : /*************************************************************************
4017 : This function sets window width for SSA model. You should call it before
4018 : analysis phase. Default window width is 1 (not for real use).
4019 :
4020 : Special notes:
4021 : * this function call can be performed at any moment before first call to
4022 : analysis-related functions
4023 : * changing window width invalidates internally stored basis; if you change
4024 : window width AFTER you call analysis-related function, next analysis
4025 : phase will require re-calculation of the basis according to current
4026 : algorithm.
4027 : * calling this function with exactly same window width as current one has
4028 : no effect
4029 : * if you specify window width larger than any data sequence stored in the
4030 : model, analysis will return zero basis.
4031 :
4032 : INPUT PARAMETERS:
4033 : S - SSA model created with ssacreate()
4034 : WindowWidth - >=1, new window width
4035 :
4036 : OUTPUT PARAMETERS:
4037 : S - SSA model, updated
4038 :
4039 : -- ALGLIB --
4040 : Copyright 30.10.2017 by Bochkanov Sergey
4041 : *************************************************************************/
4042 0 : void ssasetwindow(const ssamodel &s, const ae_int_t windowwidth, const xparams _xparams)
4043 : {
4044 : jmp_buf _break_jump;
4045 : alglib_impl::ae_state _alglib_env_state;
4046 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4047 0 : if( setjmp(_break_jump) )
4048 : {
4049 : #if !defined(AE_NO_EXCEPTIONS)
4050 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4051 : #else
4052 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4053 : return;
4054 : #endif
4055 : }
4056 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4057 0 : if( _xparams.flags!=0x0 )
4058 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4059 0 : alglib_impl::ssasetwindow(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), windowwidth, &_alglib_env_state);
4060 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4061 0 : return;
4062 : }
4063 :
4064 : /*************************************************************************
4065 : This function sets seed which is used to initialize internal RNG when
4066 : we make pseudorandom decisions on model updates.
4067 :
4068 : By default, deterministic seed is used - which results in same sequence of
4069 : pseudorandom decisions every time you run SSA model. If you specify non-
4070 : deterministic seed value, then SSA model may return slightly different
4071 : results after each run.
4072 :
4073 : This function can be useful when you have several SSA models updated with
4074 : sseappendpointandupdate() called with 0<UpdateIts<1 (fractional value) and
4075 : due to performance limitations want them to perform updates at different
4076 : moments.
4077 :
4078 : INPUT PARAMETERS:
4079 : S - SSA model
4080 : Seed - seed:
4081 : * positive values = use deterministic seed for each run of
4082 : algorithms which depend on random initialization
4083 : * zero or negative values = use non-deterministic seed
4084 :
4085 : -- ALGLIB --
4086 : Copyright 03.11.2017 by Bochkanov Sergey
4087 : *************************************************************************/
4088 0 : void ssasetseed(const ssamodel &s, const ae_int_t seed, const xparams _xparams)
4089 : {
4090 : jmp_buf _break_jump;
4091 : alglib_impl::ae_state _alglib_env_state;
4092 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4093 0 : if( setjmp(_break_jump) )
4094 : {
4095 : #if !defined(AE_NO_EXCEPTIONS)
4096 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4097 : #else
4098 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4099 : return;
4100 : #endif
4101 : }
4102 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4103 0 : if( _xparams.flags!=0x0 )
4104 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4105 0 : alglib_impl::ssasetseed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), seed, &_alglib_env_state);
4106 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4107 0 : return;
4108 : }
4109 :
4110 : /*************************************************************************
4111 : This function sets length of power-up cycle for real-time algorithm.
4112 :
4113 : By default, this algorithm performs costly O(N*WindowWidth^2) init phase
4114 : followed by full run of truncated EVD. However, if you are ready to
4115 : live with a bit lower-quality basis during first few iterations, you can
4116 : split this O(N*WindowWidth^2) initialization between several subsequent
4117 : append-and-update rounds. It results in better latency of the algorithm.
4118 :
4119 : This function invalidates basis/solver, next analysis call will result in
4120 : full recalculation of everything.
4121 :
4122 : INPUT PARAMETERS:
4123 : S - SSA model
4124 : PWLen - length of the power-up stage:
4125 : * 0 means that no power-up is requested
4126 : * 1 is the same as 0
4127 : * >1 means that delayed power-up is performed
4128 :
4129 : -- ALGLIB --
4130 : Copyright 03.11.2017 by Bochkanov Sergey
4131 : *************************************************************************/
4132 0 : void ssasetpoweruplength(const ssamodel &s, const ae_int_t pwlen, const xparams _xparams)
4133 : {
4134 : jmp_buf _break_jump;
4135 : alglib_impl::ae_state _alglib_env_state;
4136 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4137 0 : if( setjmp(_break_jump) )
4138 : {
4139 : #if !defined(AE_NO_EXCEPTIONS)
4140 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4141 : #else
4142 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4143 : return;
4144 : #endif
4145 : }
4146 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4147 0 : if( _xparams.flags!=0x0 )
4148 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4149 0 : alglib_impl::ssasetpoweruplength(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), pwlen, &_alglib_env_state);
4150 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4151 0 : return;
4152 : }
4153 :
4154 : /*************************************************************************
4155 : This function sets memory limit of SSA analysis.
4156 :
4157 : Straightforward SSA with sequence length T and window width W needs O(T*W)
4158 : memory. It is possible to reduce memory consumption by splitting task into
4159 : smaller chunks.
4160 :
4161 : Thus function allows you to specify approximate memory limit (measured in
4162 : double precision numbers used for buffers). Actual memory consumption will
4163 : be comparable to the number specified by you.
4164 :
4165 : Default memory limit is 50.000.000 (400Mbytes) in current version.
4166 :
4167 : INPUT PARAMETERS:
4168 : S - SSA model
4169 : MemLimit- memory limit, >=0. Zero value means no limit.
4170 :
4171 : -- ALGLIB --
4172 : Copyright 20.12.2017 by Bochkanov Sergey
4173 : *************************************************************************/
4174 0 : void ssasetmemorylimit(const ssamodel &s, const ae_int_t memlimit, const xparams _xparams)
4175 : {
4176 : jmp_buf _break_jump;
4177 : alglib_impl::ae_state _alglib_env_state;
4178 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4179 0 : if( setjmp(_break_jump) )
4180 : {
4181 : #if !defined(AE_NO_EXCEPTIONS)
4182 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4183 : #else
4184 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4185 : return;
4186 : #endif
4187 : }
4188 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4189 0 : if( _xparams.flags!=0x0 )
4190 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4191 0 : alglib_impl::ssasetmemorylimit(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), memlimit, &_alglib_env_state);
4192 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4193 0 : return;
4194 : }
4195 :
4196 : /*************************************************************************
4197 : This function adds data sequence to SSA model. Only single-dimensional
4198 : sequences are supported.
4199 :
4200 : What is a sequences? Following definitions/requirements apply:
4201 : * a sequence is an array of values measured in subsequent, equally
4202 : separated time moments (ticks).
4203 : * you may have many sequences in your dataset; say, one sequence may
4204 : correspond to one trading session.
4205 : * sequence length should be larger than current window length (shorter
4206 : sequences will be ignored during analysis).
4207 : * analysis is performed within a sequence; different sequences are NOT
4208 : stacked together to produce one large contiguous stream of data.
4209 : * analysis is performed for all sequences at once, i.e. same set of basis
4210 : vectors is computed for all sequences
4211 :
4212 : INCREMENTAL ANALYSIS
4213 :
4214 : This function is non intended for incremental updates of previously found
4215 : SSA basis. Calling it invalidates all previous analysis results (basis is
4216 : reset and will be recalculated from zero during next analysis).
4217 :
4218 : If you want to perform incremental/real-time SSA, consider using
4219 : following functions:
4220 : * ssaappendpointandupdate() for appending one point
4221 : * ssaappendsequenceandupdate() for appending new sequence
4222 :
4223 : INPUT PARAMETERS:
4224 : S - SSA model created with ssacreate()
4225 : X - array[N], data, can be larger (additional values
4226 : are ignored)
4227 : N - data length, can be automatically determined from
4228 : the array length. N>=0.
4229 :
4230 : OUTPUT PARAMETERS:
4231 : S - SSA model, updated
4232 :
4233 : NOTE: you can clear dataset with ssacleardata()
4234 :
4235 : -- ALGLIB --
4236 : Copyright 30.10.2017 by Bochkanov Sergey
4237 : *************************************************************************/
4238 0 : void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const ae_int_t n, const xparams _xparams)
4239 : {
4240 : jmp_buf _break_jump;
4241 : alglib_impl::ae_state _alglib_env_state;
4242 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4243 0 : if( setjmp(_break_jump) )
4244 : {
4245 : #if !defined(AE_NO_EXCEPTIONS)
4246 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4247 : #else
4248 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4249 : return;
4250 : #endif
4251 : }
4252 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4253 0 : if( _xparams.flags!=0x0 )
4254 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4255 0 : alglib_impl::ssaaddsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, &_alglib_env_state);
4256 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4257 0 : return;
4258 : }
4259 :
4260 : /*************************************************************************
4261 : This function adds data sequence to SSA model. Only single-dimensional
4262 : sequences are supported.
4263 :
4264 : What is a sequences? Following definitions/requirements apply:
4265 : * a sequence is an array of values measured in subsequent, equally
4266 : separated time moments (ticks).
4267 : * you may have many sequences in your dataset; say, one sequence may
4268 : correspond to one trading session.
4269 : * sequence length should be larger than current window length (shorter
4270 : sequences will be ignored during analysis).
4271 : * analysis is performed within a sequence; different sequences are NOT
4272 : stacked together to produce one large contiguous stream of data.
4273 : * analysis is performed for all sequences at once, i.e. same set of basis
4274 : vectors is computed for all sequences
4275 :
4276 : INCREMENTAL ANALYSIS
4277 :
4278 : This function is non intended for incremental updates of previously found
4279 : SSA basis. Calling it invalidates all previous analysis results (basis is
4280 : reset and will be recalculated from zero during next analysis).
4281 :
4282 : If you want to perform incremental/real-time SSA, consider using
4283 : following functions:
4284 : * ssaappendpointandupdate() for appending one point
4285 : * ssaappendsequenceandupdate() for appending new sequence
4286 :
4287 : INPUT PARAMETERS:
4288 : S - SSA model created with ssacreate()
4289 : X - array[N], data, can be larger (additional values
4290 : are ignored)
4291 : N - data length, can be automatically determined from
4292 : the array length. N>=0.
4293 :
4294 : OUTPUT PARAMETERS:
4295 : S - SSA model, updated
4296 :
4297 : NOTE: you can clear dataset with ssacleardata()
4298 :
4299 : -- ALGLIB --
4300 : Copyright 30.10.2017 by Bochkanov Sergey
4301 : *************************************************************************/
4302 : #if !defined(AE_NO_EXCEPTIONS)
4303 0 : void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const xparams _xparams)
4304 : {
4305 : jmp_buf _break_jump;
4306 : alglib_impl::ae_state _alglib_env_state;
4307 : ae_int_t n;
4308 :
4309 0 : n = x.length();
4310 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4311 0 : if( setjmp(_break_jump) )
4312 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4313 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4314 0 : if( _xparams.flags!=0x0 )
4315 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4316 0 : alglib_impl::ssaaddsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, &_alglib_env_state);
4317 :
4318 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4319 0 : return;
4320 : }
4321 : #endif
4322 :
4323 : /*************************************************************************
4324 : This function appends single point to last data sequence stored in the SSA
4325 : model and tries to update model in the incremental manner (if possible
4326 : with current algorithm).
4327 :
4328 : If you want to add more than one point at once:
4329 : * if you want to add M points to the same sequence, perform M-1 calls with
4330 : UpdateIts parameter set to 0.0, and last call with non-zero UpdateIts.
4331 : * if you want to add new sequence, use ssaappendsequenceandupdate()
4332 :
4333 : Running time of this function does NOT depend on dataset size, only on
4334 : window width and number of singular vectors. Depending on algorithm being
4335 : used, incremental update has complexity:
4336 : * for top-K real time - O(UpdateIts*K*Width^2), with fractional UpdateIts
4337 : * for top-K direct - O(Width^3) for any non-zero UpdateIts
4338 : * for precomputed basis - O(1), no update is performed
4339 :
4340 : INPUT PARAMETERS:
4341 : S - SSA model created with ssacreate()
4342 : X - new point
4343 : UpdateIts - >=0, floating point (!) value, desired update
4344 : frequency:
4345 : * zero value means that point is stored, but no
4346 : update is performed
4347 : * integer part of the value means that specified
4348 : number of iterations is always performed
4349 : * fractional part of the value means that one
4350 : iteration is performed with this probability.
4351 :
4352 : Recommended value: 0<UpdateIts<=1. Values larger
4353 : than 1 are VERY seldom needed. If your dataset
4354 : changes slowly, you can set it to 0.1 and skip
4355 : 90% of updates.
4356 :
4357 : In any case, no information is lost even with zero
4358 : value of UpdateIts! It will be incorporated into
4359 : model, sooner or later.
4360 :
4361 : OUTPUT PARAMETERS:
4362 : S - SSA model, updated
4363 :
4364 : NOTE: this function uses internal RNG to handle fractional values of
4365 : UpdateIts. By default it is initialized with fixed seed during
4366 : initial calculation of basis. Thus subsequent calls to this function
4367 : will result in the same sequence of pseudorandom decisions.
4368 :
4369 : However, if you have several SSA models which are calculated
4370 : simultaneously, and if you want to reduce computational bottlenecks
4371 : by performing random updates at random moments, then fixed seed is
4372 : not an option - all updates will fire at same moments.
4373 :
4374 : You may change it with ssasetseed() function.
4375 :
4376 : NOTE: this function throws an exception if called for empty dataset (there
4377 : is no "last" sequence to modify).
4378 :
4379 : -- ALGLIB --
4380 : Copyright 30.10.2017 by Bochkanov Sergey
4381 : *************************************************************************/
4382 0 : void ssaappendpointandupdate(const ssamodel &s, const double x, const double updateits, const xparams _xparams)
4383 : {
4384 : jmp_buf _break_jump;
4385 : alglib_impl::ae_state _alglib_env_state;
4386 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4387 0 : if( setjmp(_break_jump) )
4388 : {
4389 : #if !defined(AE_NO_EXCEPTIONS)
4390 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4391 : #else
4392 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4393 : return;
4394 : #endif
4395 : }
4396 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4397 0 : if( _xparams.flags!=0x0 )
4398 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4399 0 : alglib_impl::ssaappendpointandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), x, updateits, &_alglib_env_state);
4400 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4401 0 : return;
4402 : }
4403 :
4404 : /*************************************************************************
4405 : This function appends new sequence to dataset stored in the SSA model and
4406 : tries to update model in the incremental manner (if possible with current
4407 : algorithm).
4408 :
4409 : Notes:
4410 : * if you want to add M sequences at once, perform M-1 calls with UpdateIts
4411 : parameter set to 0.0, and last call with non-zero UpdateIts.
4412 : * if you want to add just one point, use ssaappendpointandupdate()
4413 :
4414 : Running time of this function does NOT depend on dataset size, only on
4415 : sequence length, window width and number of singular vectors. Depending on
4416 : algorithm being used, incremental update has complexity:
4417 : * for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
4418 : * for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
4419 : * for precomputed basis - O(1), no update is performed
4420 :
4421 : INPUT PARAMETERS:
4422 : S - SSA model created with ssacreate()
4423 : X - new sequence, array[NTicks] or larget
4424 : NTicks - >=1, number of ticks in the sequence
4425 : UpdateIts - >=0, floating point (!) value, desired update
4426 : frequency:
4427 : * zero value means that point is stored, but no
4428 : update is performed
4429 : * integer part of the value means that specified
4430 : number of iterations is always performed
4431 : * fractional part of the value means that one
4432 : iteration is performed with this probability.
4433 :
4434 : Recommended value: 0<UpdateIts<=1. Values larger
4435 : than 1 are VERY seldom needed. If your dataset
4436 : changes slowly, you can set it to 0.1 and skip
4437 : 90% of updates.
4438 :
4439 : In any case, no information is lost even with zero
4440 : value of UpdateIts! It will be incorporated into
4441 : model, sooner or later.
4442 :
4443 : OUTPUT PARAMETERS:
4444 : S - SSA model, updated
4445 :
4446 : NOTE: this function uses internal RNG to handle fractional values of
4447 : UpdateIts. By default it is initialized with fixed seed during
4448 : initial calculation of basis. Thus subsequent calls to this function
4449 : will result in the same sequence of pseudorandom decisions.
4450 :
4451 : However, if you have several SSA models which are calculated
4452 : simultaneously, and if you want to reduce computational bottlenecks
4453 : by performing random updates at random moments, then fixed seed is
4454 : not an option - all updates will fire at same moments.
4455 :
4456 : You may change it with ssasetseed() function.
4457 :
4458 : -- ALGLIB --
4459 : Copyright 30.10.2017 by Bochkanov Sergey
4460 : *************************************************************************/
4461 0 : void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const ae_int_t nticks, const double updateits, const xparams _xparams)
4462 : {
4463 : jmp_buf _break_jump;
4464 : alglib_impl::ae_state _alglib_env_state;
4465 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4466 0 : if( setjmp(_break_jump) )
4467 : {
4468 : #if !defined(AE_NO_EXCEPTIONS)
4469 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4470 : #else
4471 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4472 : return;
4473 : #endif
4474 : }
4475 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4476 0 : if( _xparams.flags!=0x0 )
4477 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4478 0 : alglib_impl::ssaappendsequenceandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), nticks, updateits, &_alglib_env_state);
4479 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4480 0 : return;
4481 : }
4482 :
4483 : /*************************************************************************
4484 : This function appends new sequence to dataset stored in the SSA model and
4485 : tries to update model in the incremental manner (if possible with current
4486 : algorithm).
4487 :
4488 : Notes:
4489 : * if you want to add M sequences at once, perform M-1 calls with UpdateIts
4490 : parameter set to 0.0, and last call with non-zero UpdateIts.
4491 : * if you want to add just one point, use ssaappendpointandupdate()
4492 :
4493 : Running time of this function does NOT depend on dataset size, only on
4494 : sequence length, window width and number of singular vectors. Depending on
4495 : algorithm being used, incremental update has complexity:
4496 : * for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
4497 : * for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
4498 : * for precomputed basis - O(1), no update is performed
4499 :
4500 : INPUT PARAMETERS:
4501 : S - SSA model created with ssacreate()
4502 : X - new sequence, array[NTicks] or larget
4503 : NTicks - >=1, number of ticks in the sequence
4504 : UpdateIts - >=0, floating point (!) value, desired update
4505 : frequency:
4506 : * zero value means that point is stored, but no
4507 : update is performed
4508 : * integer part of the value means that specified
4509 : number of iterations is always performed
4510 : * fractional part of the value means that one
4511 : iteration is performed with this probability.
4512 :
4513 : Recommended value: 0<UpdateIts<=1. Values larger
4514 : than 1 are VERY seldom needed. If your dataset
4515 : changes slowly, you can set it to 0.1 and skip
4516 : 90% of updates.
4517 :
4518 : In any case, no information is lost even with zero
4519 : value of UpdateIts! It will be incorporated into
4520 : model, sooner or later.
4521 :
4522 : OUTPUT PARAMETERS:
4523 : S - SSA model, updated
4524 :
4525 : NOTE: this function uses internal RNG to handle fractional values of
4526 : UpdateIts. By default it is initialized with fixed seed during
4527 : initial calculation of basis. Thus subsequent calls to this function
4528 : will result in the same sequence of pseudorandom decisions.
4529 :
4530 : However, if you have several SSA models which are calculated
4531 : simultaneously, and if you want to reduce computational bottlenecks
4532 : by performing random updates at random moments, then fixed seed is
4533 : not an option - all updates will fire at same moments.
4534 :
4535 : You may change it with ssasetseed() function.
4536 :
4537 : -- ALGLIB --
4538 : Copyright 30.10.2017 by Bochkanov Sergey
4539 : *************************************************************************/
4540 : #if !defined(AE_NO_EXCEPTIONS)
4541 0 : void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const double updateits, const xparams _xparams)
4542 : {
4543 : jmp_buf _break_jump;
4544 : alglib_impl::ae_state _alglib_env_state;
4545 : ae_int_t nticks;
4546 :
4547 0 : nticks = x.length();
4548 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4549 0 : if( setjmp(_break_jump) )
4550 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4551 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4552 0 : if( _xparams.flags!=0x0 )
4553 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4554 0 : alglib_impl::ssaappendsequenceandupdate(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), nticks, updateits, &_alglib_env_state);
4555 :
4556 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4557 0 : return;
4558 : }
4559 : #endif
4560 :
4561 : /*************************************************************************
4562 : This function sets SSA algorithm to "precomputed vectors" algorithm.
4563 :
4564 : This algorithm uses precomputed set of orthonormal (orthogonal AND
4565 : normalized) basis vectors supplied by user. Thus, basis calculation phase
4566 : is not performed - we already have our basis - and only analysis/
4567 : forecasting phase requires actual calculations.
4568 :
4569 : This algorithm may handle "append" requests which add just one/few ticks
4570 : to the end of the last sequence in O(1) time.
4571 :
4572 : NOTE: this algorithm accepts both basis and window width, because these
4573 : two parameters are naturally aligned. Calling this function sets
4574 : window width; if you call ssasetwindow() with other window width,
4575 : then during analysis stage algorithm will detect conflict and reset
4576 : to zero basis.
4577 :
4578 : INPUT PARAMETERS:
4579 : S - SSA model
4580 : A - array[WindowWidth,NBasis], orthonormalized basis;
4581 : this function does NOT control orthogonality and
4582 : does NOT perform any kind of renormalization. It
4583 : is your responsibility to provide it with correct
4584 : basis.
4585 : WindowWidth - window width, >=1
4586 : NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
4587 :
4588 : OUTPUT PARAMETERS:
4589 : S - updated model
4590 :
4591 : NOTE: calling this function invalidates basis in all cases.
4592 :
4593 : -- ALGLIB --
4594 : Copyright 30.10.2017 by Bochkanov Sergey
4595 : *************************************************************************/
4596 0 : void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const ae_int_t windowwidth, const ae_int_t nbasis, const xparams _xparams)
4597 : {
4598 : jmp_buf _break_jump;
4599 : alglib_impl::ae_state _alglib_env_state;
4600 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4601 0 : if( setjmp(_break_jump) )
4602 : {
4603 : #if !defined(AE_NO_EXCEPTIONS)
4604 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4605 : #else
4606 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4607 : return;
4608 : #endif
4609 : }
4610 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4611 0 : if( _xparams.flags!=0x0 )
4612 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4613 0 : alglib_impl::ssasetalgoprecomputed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), windowwidth, nbasis, &_alglib_env_state);
4614 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4615 0 : return;
4616 : }
4617 :
4618 : /*************************************************************************
4619 : This function sets SSA algorithm to "precomputed vectors" algorithm.
4620 :
4621 : This algorithm uses precomputed set of orthonormal (orthogonal AND
4622 : normalized) basis vectors supplied by user. Thus, basis calculation phase
4623 : is not performed - we already have our basis - and only analysis/
4624 : forecasting phase requires actual calculations.
4625 :
4626 : This algorithm may handle "append" requests which add just one/few ticks
4627 : to the end of the last sequence in O(1) time.
4628 :
4629 : NOTE: this algorithm accepts both basis and window width, because these
4630 : two parameters are naturally aligned. Calling this function sets
4631 : window width; if you call ssasetwindow() with other window width,
4632 : then during analysis stage algorithm will detect conflict and reset
4633 : to zero basis.
4634 :
4635 : INPUT PARAMETERS:
4636 : S - SSA model
4637 : A - array[WindowWidth,NBasis], orthonormalized basis;
4638 : this function does NOT control orthogonality and
4639 : does NOT perform any kind of renormalization. It
4640 : is your responsibility to provide it with correct
4641 : basis.
4642 : WindowWidth - window width, >=1
4643 : NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
4644 :
4645 : OUTPUT PARAMETERS:
4646 : S - updated model
4647 :
4648 : NOTE: calling this function invalidates basis in all cases.
4649 :
4650 : -- ALGLIB --
4651 : Copyright 30.10.2017 by Bochkanov Sergey
4652 : *************************************************************************/
4653 : #if !defined(AE_NO_EXCEPTIONS)
4654 0 : void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const xparams _xparams)
4655 : {
4656 : jmp_buf _break_jump;
4657 : alglib_impl::ae_state _alglib_env_state;
4658 : ae_int_t windowwidth;
4659 : ae_int_t nbasis;
4660 :
4661 0 : windowwidth = a.rows();
4662 0 : nbasis = a.cols();
4663 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4664 0 : if( setjmp(_break_jump) )
4665 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4666 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4667 0 : if( _xparams.flags!=0x0 )
4668 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4669 0 : alglib_impl::ssasetalgoprecomputed(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), windowwidth, nbasis, &_alglib_env_state);
4670 :
4671 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4672 0 : return;
4673 : }
4674 : #endif
4675 :
4676 : /*************************************************************************
4677 : This function sets SSA algorithm to "direct top-K" algorithm.
4678 :
4679 : "Direct top-K" algorithm performs full SVD of the N*WINDOW trajectory
4680 : matrix (hence its name - direct solver is used), then extracts top K
4681 : components. Overall running time is O(N*WINDOW^2), where N is a number of
4682 : ticks in the dataset, WINDOW is window width.
4683 :
4684 : This algorithm may handle "append" requests which add just one/few ticks
4685 : to the end of the last sequence in O(WINDOW^3) time, which is ~N/WINDOW
4686 : times faster than re-computing everything from scratch.
4687 :
4688 : INPUT PARAMETERS:
4689 : S - SSA model
4690 : TopK - number of components to analyze; TopK>=1.
4691 :
4692 : OUTPUT PARAMETERS:
4693 : S - updated model
4694 :
4695 :
4696 : NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
4697 : phase
4698 :
4699 : NOTE: calling this function invalidates basis, except for the situation
4700 : when this algorithm was already set with same parameters.
4701 :
4702 : -- ALGLIB --
4703 : Copyright 30.10.2017 by Bochkanov Sergey
4704 : *************************************************************************/
4705 0 : void ssasetalgotopkdirect(const ssamodel &s, const ae_int_t topk, const xparams _xparams)
4706 : {
4707 : jmp_buf _break_jump;
4708 : alglib_impl::ae_state _alglib_env_state;
4709 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4710 0 : if( setjmp(_break_jump) )
4711 : {
4712 : #if !defined(AE_NO_EXCEPTIONS)
4713 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4714 : #else
4715 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4716 : return;
4717 : #endif
4718 : }
4719 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4720 0 : if( _xparams.flags!=0x0 )
4721 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4722 0 : alglib_impl::ssasetalgotopkdirect(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), topk, &_alglib_env_state);
4723 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4724 0 : return;
4725 : }
4726 :
4727 : /*************************************************************************
4728 : This function sets SSA algorithm to "top-K real time algorithm". This algo
4729 : extracts K components with largest singular values.
4730 :
4731 : It is real-time version of top-K algorithm which is optimized for
4732 : incremental processing and fast start-up. Internally it uses subspace
4733 : eigensolver for truncated SVD. It results in ability to perform quick
4734 : updates of the basis when only a few points/sequences is added to dataset.
4735 :
4736 : Performance profile of the algorithm is given below:
4737 : * O(K*WindowWidth^2) running time for incremental update of the dataset
4738 : with one of the "append-and-update" functions (ssaappendpointandupdate()
4739 : or ssaappendsequenceandupdate()).
4740 : * O(N*WindowWidth^2) running time for initial basis evaluation (N=size of
4741 : dataset)
4742 : * ability to split costly initialization across several incremental
4743 : updates of the basis (so called "Power-Up" functionality, activated by
4744 : ssasetpoweruplength() function)
4745 :
4746 : INPUT PARAMETERS:
4747 : S - SSA model
4748 : TopK - number of components to analyze; TopK>=1.
4749 :
4750 : OUTPUT PARAMETERS:
4751 : S - updated model
4752 :
4753 : NOTE: this algorithm is optimized for large-scale tasks with large
4754 : datasets. On toy problems with just 5-10 points it can return basis
4755 : which is slightly different from that returned by direct algorithm
4756 : (ssasetalgotopkdirect() function). However, the difference becomes
4757 : negligible as dataset grows.
4758 :
4759 : NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
4760 : phase
4761 :
4762 : NOTE: calling this function invalidates basis, except for the situation
4763 : when this algorithm was already set with same parameters.
4764 :
4765 : -- ALGLIB --
4766 : Copyright 30.10.2017 by Bochkanov Sergey
4767 : *************************************************************************/
4768 0 : void ssasetalgotopkrealtime(const ssamodel &s, const ae_int_t topk, const xparams _xparams)
4769 : {
4770 : jmp_buf _break_jump;
4771 : alglib_impl::ae_state _alglib_env_state;
4772 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4773 0 : if( setjmp(_break_jump) )
4774 : {
4775 : #if !defined(AE_NO_EXCEPTIONS)
4776 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4777 : #else
4778 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4779 : return;
4780 : #endif
4781 : }
4782 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4783 0 : if( _xparams.flags!=0x0 )
4784 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4785 0 : alglib_impl::ssasetalgotopkrealtime(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), topk, &_alglib_env_state);
4786 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4787 0 : return;
4788 : }
4789 :
4790 : /*************************************************************************
4791 : This function clears all data stored in the model and invalidates all
4792 : basis components found so far.
4793 :
4794 : INPUT PARAMETERS:
4795 : S - SSA model created with ssacreate()
4796 :
4797 : OUTPUT PARAMETERS:
4798 : S - SSA model, updated
4799 :
4800 : -- ALGLIB --
4801 : Copyright 30.10.2017 by Bochkanov Sergey
4802 : *************************************************************************/
4803 0 : void ssacleardata(const ssamodel &s, const xparams _xparams)
4804 : {
4805 : jmp_buf _break_jump;
4806 : alglib_impl::ae_state _alglib_env_state;
4807 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4808 0 : if( setjmp(_break_jump) )
4809 : {
4810 : #if !defined(AE_NO_EXCEPTIONS)
4811 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4812 : #else
4813 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4814 : return;
4815 : #endif
4816 : }
4817 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4818 0 : if( _xparams.flags!=0x0 )
4819 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4820 0 : alglib_impl::ssacleardata(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), &_alglib_env_state);
4821 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4822 0 : return;
4823 : }
4824 :
4825 : /*************************************************************************
4826 : This function executes SSA on internally stored dataset and returns basis
4827 : found by current method.
4828 :
4829 : INPUT PARAMETERS:
4830 : S - SSA model
4831 :
4832 : OUTPUT PARAMETERS:
4833 : A - array[WindowWidth,NBasis], basis; vectors are
4834 : stored in matrix columns, by descreasing variance
4835 : SV - array[NBasis]:
4836 : * zeros - for model initialized with SSASetAlgoPrecomputed()
4837 : * singular values - for other algorithms
4838 : WindowWidth - current window
4839 : NBasis - basis size
4840 :
4841 :
4842 : CACHING/REUSE OF THE BASIS
4843 :
4844 : Caching/reuse of previous results is performed:
4845 : * first call performs full run of SSA; basis is stored in the cache
4846 : * subsequent calls reuse previously cached basis
4847 : * if you call any function which changes model properties (window length,
4848 : algorithm, dataset), internal basis will be invalidated.
4849 : * the only calls which do NOT invalidate basis are listed below:
4850 : a) ssasetwindow() with same window length
4851 : b) ssaappendpointandupdate()
4852 : c) ssaappendsequenceandupdate()
4853 : d) ssasetalgotopk...() with exactly same K
4854 : Calling these functions will result in reuse of previously found basis.
4855 :
4856 :
4857 : HANDLING OF DEGENERATE CASES
4858 :
4859 : Calling this function in degenerate cases (no data or all data are
4860 : shorter than window size; no algorithm is specified) returns basis with
4861 : just one zero vector.
4862 :
4863 : -- ALGLIB --
4864 : Copyright 30.10.2017 by Bochkanov Sergey
4865 : *************************************************************************/
4866 0 : void ssagetbasis(const ssamodel &s, real_2d_array &a, real_1d_array &sv, ae_int_t &windowwidth, ae_int_t &nbasis, const xparams _xparams)
4867 : {
4868 : jmp_buf _break_jump;
4869 : alglib_impl::ae_state _alglib_env_state;
4870 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4871 0 : if( setjmp(_break_jump) )
4872 : {
4873 : #if !defined(AE_NO_EXCEPTIONS)
4874 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4875 : #else
4876 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4877 : return;
4878 : #endif
4879 : }
4880 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4881 0 : if( _xparams.flags!=0x0 )
4882 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4883 0 : alglib_impl::ssagetbasis(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), const_cast<alglib_impl::ae_vector*>(sv.c_ptr()), &windowwidth, &nbasis, &_alglib_env_state);
4884 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4885 0 : return;
4886 : }
4887 :
4888 : /*************************************************************************
4889 : This function returns linear recurrence relation (LRR) coefficients found
4890 : by current SSA algorithm.
4891 :
4892 : INPUT PARAMETERS:
4893 : S - SSA model
4894 :
4895 : OUTPUT PARAMETERS:
4896 : A - array[WindowWidth-1]. Coefficients of the
4897 : linear recurrence of the form:
4898 : X[W-1] = X[W-2]*A[W-2] + X[W-3]*A[W-3] + ... + X[0]*A[0].
4899 : Empty array for WindowWidth=1.
4900 : WindowWidth - current window width
4901 :
4902 :
4903 : CACHING/REUSE OF THE BASIS
4904 :
4905 : Caching/reuse of previous results is performed:
4906 : * first call performs full run of SSA; basis is stored in the cache
4907 : * subsequent calls reuse previously cached basis
4908 : * if you call any function which changes model properties (window length,
4909 : algorithm, dataset), internal basis will be invalidated.
4910 : * the only calls which do NOT invalidate basis are listed below:
4911 : a) ssasetwindow() with same window length
4912 : b) ssaappendpointandupdate()
4913 : c) ssaappendsequenceandupdate()
4914 : d) ssasetalgotopk...() with exactly same K
4915 : Calling these functions will result in reuse of previously found basis.
4916 :
4917 :
4918 : HANDLING OF DEGENERATE CASES
4919 :
4920 : Calling this function in degenerate cases (no data or all data are
4921 : shorter than window size; no algorithm is specified) returns zeros.
4922 :
4923 : -- ALGLIB --
4924 : Copyright 30.10.2017 by Bochkanov Sergey
4925 : *************************************************************************/
4926 0 : void ssagetlrr(const ssamodel &s, real_1d_array &a, ae_int_t &windowwidth, const xparams _xparams)
4927 : {
4928 : jmp_buf _break_jump;
4929 : alglib_impl::ae_state _alglib_env_state;
4930 0 : alglib_impl::ae_state_init(&_alglib_env_state);
4931 0 : if( setjmp(_break_jump) )
4932 : {
4933 : #if !defined(AE_NO_EXCEPTIONS)
4934 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
4935 : #else
4936 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
4937 : return;
4938 : #endif
4939 : }
4940 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
4941 0 : if( _xparams.flags!=0x0 )
4942 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
4943 0 : alglib_impl::ssagetlrr(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(a.c_ptr()), &windowwidth, &_alglib_env_state);
4944 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
4945 0 : return;
4946 : }
4947 :
4948 : /*************************************************************************
4949 : This function executes SSA on internally stored dataset and returns
4950 : analysis for the last window of the last sequence. Such analysis is
4951 : an lightweight alternative for full scale reconstruction (see below).
4952 :
4953 : Typical use case for this function is real-time setting, when you are
4954 : interested in quick-and-dirty (very quick and very dirty) processing of
4955 : just a few last ticks of the trend.
4956 :
4957 : IMPORTANT: full scale SSA involves analysis of the ENTIRE dataset,
4958 : with reconstruction being done for all positions of sliding
4959 : window with subsequent hankelization (diagonal averaging) of
4960 : the resulting matrix.
4961 :
4962 : Such analysis requires O((DataLen-Window)*Window*NBasis) FLOPs
4963 : and can be quite costly. However, it has nice noise-canceling
4964 : effects due to averaging.
4965 :
4966 : This function performs REDUCED analysis of the last window. It
4967 : is much faster - just O(Window*NBasis), but its results are
4968 : DIFFERENT from that of ssaanalyzelast(). In particular, first
4969 : few points of the trend are much more prone to noise.
4970 :
4971 : INPUT PARAMETERS:
4972 : S - SSA model
4973 :
4974 : OUTPUT PARAMETERS:
4975 : Trend - array[WindowSize], reconstructed trend line
4976 : Noise - array[WindowSize], the rest of the signal;
4977 : it holds that ActualData = Trend+Noise.
4978 : NTicks - current WindowSize
4979 :
4980 :
4981 : CACHING/REUSE OF THE BASIS
4982 :
4983 : Caching/reuse of previous results is performed:
4984 : * first call performs full run of SSA; basis is stored in the cache
4985 : * subsequent calls reuse previously cached basis
4986 : * if you call any function which changes model properties (window length,
4987 : algorithm, dataset), internal basis will be invalidated.
4988 : * the only calls which do NOT invalidate basis are listed below:
4989 : a) ssasetwindow() with same window length
4990 : b) ssaappendpointandupdate()
4991 : c) ssaappendsequenceandupdate()
4992 : d) ssasetalgotopk...() with exactly same K
4993 : Calling these functions will result in reuse of previously found basis.
4994 :
4995 : In any case, only basis is reused. Reconstruction is performed from
4996 : scratch every time you call this function.
4997 :
4998 :
4999 : HANDLING OF DEGENERATE CASES
5000 :
5001 : Following degenerate cases may happen:
5002 : * dataset is empty (no analysis can be done)
5003 : * all sequences are shorter than the window length,no analysis can be done
5004 : * no algorithm is specified (no analysis can be done)
5005 : * last sequence is shorter than the window length (analysis can be done,
5006 : but we can not perform reconstruction on the last sequence)
5007 :
5008 : Calling this function in degenerate cases returns following result:
5009 : * in any case, WindowWidth ticks is returned
5010 : * trend is assumed to be zero
5011 : * noise is initialized by the last sequence; if last sequence is shorter
5012 : than the window size, it is moved to the end of the array, and the
5013 : beginning of the noise array is filled by zeros
5014 :
5015 : No analysis is performed in degenerate cases (we immediately return dummy
5016 : values, no basis is constructed).
5017 :
5018 : -- ALGLIB --
5019 : Copyright 30.10.2017 by Bochkanov Sergey
5020 : *************************************************************************/
5021 0 : void ssaanalyzelastwindow(const ssamodel &s, real_1d_array &trend, real_1d_array &noise, ae_int_t &nticks, const xparams _xparams)
5022 : {
5023 : jmp_buf _break_jump;
5024 : alglib_impl::ae_state _alglib_env_state;
5025 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5026 0 : if( setjmp(_break_jump) )
5027 : {
5028 : #if !defined(AE_NO_EXCEPTIONS)
5029 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5030 : #else
5031 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5032 : return;
5033 : #endif
5034 : }
5035 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5036 0 : if( _xparams.flags!=0x0 )
5037 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5038 0 : alglib_impl::ssaanalyzelastwindow(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &nticks, &_alglib_env_state);
5039 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5040 0 : return;
5041 : }
5042 :
5043 : /*************************************************************************
5044 : This function:
5045 : * builds SSA basis using internally stored (entire) dataset
5046 : * returns reconstruction for the last NTicks of the last sequence
5047 :
5048 : If you want to analyze some other sequence, use ssaanalyzesequence().
5049 :
5050 : Reconstruction phase involves generation of NTicks-WindowWidth sliding
5051 : windows, their decomposition using empirical orthogonal functions found by
5052 : SSA, followed by averaging of each data point across several overlapping
5053 : windows. Thus, every point in the output trend is reconstructed using up
5054 : to WindowWidth overlapping windows (WindowWidth windows exactly in the
5055 : inner points, just one window at the extremal points).
5056 :
5057 : IMPORTANT: due to averaging this function returns different results for
5058 : different values of NTicks. It is expected and not a bug.
5059 :
5060 : For example:
5061 : * Trend[NTicks-1] is always same because it is not averaged in
5062 : any case (same applies to Trend[0]).
5063 : * Trend[NTicks-2] has different values for NTicks=WindowWidth
5064 : and NTicks=WindowWidth+1 because former case means that no
5065 : averaging is performed, and latter case means that averaging
5066 : using two sliding windows is performed. Larger values of
5067 : NTicks produce same results as NTicks=WindowWidth+1.
5068 : * ...and so on...
5069 :
5070 : PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
5071 : running time. If you work in time-constrained setting and
5072 : have to analyze just a few last ticks, choosing NTicks equal
5073 : to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
5074 : will result in good compromise between noise cancellation and
5075 : analysis speed.
5076 :
5077 : INPUT PARAMETERS:
5078 : S - SSA model
5079 : NTicks - number of ticks to analyze, Nticks>=1.
5080 : * special case of NTicks<=WindowWidth is handled
5081 : by analyzing last window and returning NTicks
5082 : last ticks.
5083 : * special case NTicks>LastSequenceLen is handled
5084 : by prepending result with NTicks-LastSequenceLen
5085 : zeros.
5086 :
5087 : OUTPUT PARAMETERS:
5088 : Trend - array[NTicks], reconstructed trend line
5089 : Noise - array[NTicks], the rest of the signal;
5090 : it holds that ActualData = Trend+Noise.
5091 :
5092 :
5093 : CACHING/REUSE OF THE BASIS
5094 :
5095 : Caching/reuse of previous results is performed:
5096 : * first call performs full run of SSA; basis is stored in the cache
5097 : * subsequent calls reuse previously cached basis
5098 : * if you call any function which changes model properties (window length,
5099 : algorithm, dataset), internal basis will be invalidated.
5100 : * the only calls which do NOT invalidate basis are listed below:
5101 : a) ssasetwindow() with same window length
5102 : b) ssaappendpointandupdate()
5103 : c) ssaappendsequenceandupdate()
5104 : d) ssasetalgotopk...() with exactly same K
5105 : Calling these functions will result in reuse of previously found basis.
5106 :
5107 : In any case, only basis is reused. Reconstruction is performed from
5108 : scratch every time you call this function.
5109 :
5110 :
5111 : HANDLING OF DEGENERATE CASES
5112 :
5113 : Following degenerate cases may happen:
5114 : * dataset is empty (no analysis can be done)
5115 : * all sequences are shorter than the window length,no analysis can be done
5116 : * no algorithm is specified (no analysis can be done)
5117 : * last sequence is shorter than the window length (analysis can be done,
5118 : but we can not perform reconstruction on the last sequence)
5119 :
5120 : Calling this function in degenerate cases returns following result:
5121 : * in any case, NTicks ticks is returned
5122 : * trend is assumed to be zero
5123 : * noise is initialized by the last sequence; if last sequence is shorter
5124 : than the window size, it is moved to the end of the array, and the
5125 : beginning of the noise array is filled by zeros
5126 :
5127 : No analysis is performed in degenerate cases (we immediately return dummy
5128 : values, no basis is constructed).
5129 :
5130 : -- ALGLIB --
5131 : Copyright 30.10.2017 by Bochkanov Sergey
5132 : *************************************************************************/
5133 0 : void ssaanalyzelast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
5134 : {
5135 : jmp_buf _break_jump;
5136 : alglib_impl::ae_state _alglib_env_state;
5137 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5138 0 : if( setjmp(_break_jump) )
5139 : {
5140 : #if !defined(AE_NO_EXCEPTIONS)
5141 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5142 : #else
5143 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5144 : return;
5145 : #endif
5146 : }
5147 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5148 0 : if( _xparams.flags!=0x0 )
5149 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5150 0 : alglib_impl::ssaanalyzelast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
5151 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5152 0 : return;
5153 : }
5154 :
5155 : /*************************************************************************
5156 : This function:
5157 : * builds SSA basis using internally stored (entire) dataset
5158 : * returns reconstruction for the sequence being passed to this function
5159 :
5160 : If you want to analyze last sequence stored in the model, use
5161 : ssaanalyzelast().
5162 :
5163 : Reconstruction phase involves generation of NTicks-WindowWidth sliding
5164 : windows, their decomposition using empirical orthogonal functions found by
5165 : SSA, followed by averaging of each data point across several overlapping
5166 : windows. Thus, every point in the output trend is reconstructed using up
5167 : to WindowWidth overlapping windows (WindowWidth windows exactly in the
5168 : inner points, just one window at the extremal points).
5169 :
5170 : PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
5171 : running time. If you work in time-constrained setting and
5172 : have to analyze just a few last ticks, choosing NTicks equal
5173 : to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
5174 : will result in good compromise between noise cancellation and
5175 : analysis speed.
5176 :
5177 : INPUT PARAMETERS:
5178 : S - SSA model
5179 : Data - array[NTicks], can be larger (only NTicks leading
5180 : elements will be used)
5181 : NTicks - number of ticks to analyze, Nticks>=1.
5182 : * special case of NTicks<WindowWidth is handled
5183 : by returning zeros as trend, and signal as noise
5184 :
5185 : OUTPUT PARAMETERS:
5186 : Trend - array[NTicks], reconstructed trend line
5187 : Noise - array[NTicks], the rest of the signal;
5188 : it holds that ActualData = Trend+Noise.
5189 :
5190 :
5191 : CACHING/REUSE OF THE BASIS
5192 :
5193 : Caching/reuse of previous results is performed:
5194 : * first call performs full run of SSA; basis is stored in the cache
5195 : * subsequent calls reuse previously cached basis
5196 : * if you call any function which changes model properties (window length,
5197 : algorithm, dataset), internal basis will be invalidated.
5198 : * the only calls which do NOT invalidate basis are listed below:
5199 : a) ssasetwindow() with same window length
5200 : b) ssaappendpointandupdate()
5201 : c) ssaappendsequenceandupdate()
5202 : d) ssasetalgotopk...() with exactly same K
5203 : Calling these functions will result in reuse of previously found basis.
5204 :
5205 : In any case, only basis is reused. Reconstruction is performed from
5206 : scratch every time you call this function.
5207 :
5208 :
5209 : HANDLING OF DEGENERATE CASES
5210 :
5211 : Following degenerate cases may happen:
5212 : * dataset is empty (no analysis can be done)
5213 : * all sequences are shorter than the window length,no analysis can be done
5214 : * no algorithm is specified (no analysis can be done)
5215 : * sequence being passed is shorter than the window length
5216 :
5217 : Calling this function in degenerate cases returns following result:
5218 : * in any case, NTicks ticks is returned
5219 : * trend is assumed to be zero
5220 : * noise is initialized by the sequence.
5221 :
5222 : No analysis is performed in degenerate cases (we immediately return dummy
5223 : values, no basis is constructed).
5224 :
5225 : -- ALGLIB --
5226 : Copyright 30.10.2017 by Bochkanov Sergey
5227 : *************************************************************************/
5228 0 : void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
5229 : {
5230 : jmp_buf _break_jump;
5231 : alglib_impl::ae_state _alglib_env_state;
5232 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5233 0 : if( setjmp(_break_jump) )
5234 : {
5235 : #if !defined(AE_NO_EXCEPTIONS)
5236 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5237 : #else
5238 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5239 : return;
5240 : #endif
5241 : }
5242 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5243 0 : if( _xparams.flags!=0x0 )
5244 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5245 0 : alglib_impl::ssaanalyzesequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
5246 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5247 0 : return;
5248 : }
5249 :
5250 : /*************************************************************************
5251 : This function:
5252 : * builds SSA basis using internally stored (entire) dataset
5253 : * returns reconstruction for the sequence being passed to this function
5254 :
5255 : If you want to analyze last sequence stored in the model, use
5256 : ssaanalyzelast().
5257 :
5258 : Reconstruction phase involves generation of NTicks-WindowWidth sliding
5259 : windows, their decomposition using empirical orthogonal functions found by
5260 : SSA, followed by averaging of each data point across several overlapping
5261 : windows. Thus, every point in the output trend is reconstructed using up
5262 : to WindowWidth overlapping windows (WindowWidth windows exactly in the
5263 : inner points, just one window at the extremal points).
5264 :
5265 : PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
5266 : running time. If you work in time-constrained setting and
5267 : have to analyze just a few last ticks, choosing NTicks equal
5268 : to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
5269 : will result in good compromise between noise cancellation and
5270 : analysis speed.
5271 :
5272 : INPUT PARAMETERS:
5273 : S - SSA model
5274 : Data - array[NTicks], can be larger (only NTicks leading
5275 : elements will be used)
5276 : NTicks - number of ticks to analyze, Nticks>=1.
5277 : * special case of NTicks<WindowWidth is handled
5278 : by returning zeros as trend, and signal as noise
5279 :
5280 : OUTPUT PARAMETERS:
5281 : Trend - array[NTicks], reconstructed trend line
5282 : Noise - array[NTicks], the rest of the signal;
5283 : it holds that ActualData = Trend+Noise.
5284 :
5285 :
5286 : CACHING/REUSE OF THE BASIS
5287 :
5288 : Caching/reuse of previous results is performed:
5289 : * first call performs full run of SSA; basis is stored in the cache
5290 : * subsequent calls reuse previously cached basis
5291 : * if you call any function which changes model properties (window length,
5292 : algorithm, dataset), internal basis will be invalidated.
5293 : * the only calls which do NOT invalidate basis are listed below:
5294 : a) ssasetwindow() with same window length
5295 : b) ssaappendpointandupdate()
5296 : c) ssaappendsequenceandupdate()
5297 : d) ssasetalgotopk...() with exactly same K
5298 : Calling these functions will result in reuse of previously found basis.
5299 :
5300 : In any case, only basis is reused. Reconstruction is performed from
5301 : scratch every time you call this function.
5302 :
5303 :
5304 : HANDLING OF DEGENERATE CASES
5305 :
5306 : Following degenerate cases may happen:
5307 : * dataset is empty (no analysis can be done)
5308 : * all sequences are shorter than the window length,no analysis can be done
5309 : * no algorithm is specified (no analysis can be done)
5310 : * sequence being passed is shorter than the window length
5311 :
5312 : Calling this function in degenerate cases returns following result:
5313 : * in any case, NTicks ticks is returned
5314 : * trend is assumed to be zero
5315 : * noise is initialized by the sequence.
5316 :
5317 : No analysis is performed in degenerate cases (we immediately return dummy
5318 : values, no basis is constructed).
5319 :
5320 : -- ALGLIB --
5321 : Copyright 30.10.2017 by Bochkanov Sergey
5322 : *************************************************************************/
5323 : #if !defined(AE_NO_EXCEPTIONS)
5324 0 : void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, real_1d_array &trend, real_1d_array &noise, const xparams _xparams)
5325 : {
5326 : jmp_buf _break_jump;
5327 : alglib_impl::ae_state _alglib_env_state;
5328 : ae_int_t nticks;
5329 :
5330 0 : nticks = data.length();
5331 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5332 0 : if( setjmp(_break_jump) )
5333 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5334 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5335 0 : if( _xparams.flags!=0x0 )
5336 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5337 0 : alglib_impl::ssaanalyzesequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), const_cast<alglib_impl::ae_vector*>(noise.c_ptr()), &_alglib_env_state);
5338 :
5339 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5340 0 : return;
5341 : }
5342 : #endif
5343 :
5344 : /*************************************************************************
5345 : This function builds SSA basis and performs forecasting for a specified
5346 : number of ticks, returning value of trend.
5347 :
5348 : Forecast is performed as follows:
5349 : * SSA trend extraction is applied to last WindowWidth elements of the
5350 : internally stored dataset; this step is basically a noise reduction.
5351 : * linear recurrence relation is applied to extracted trend
5352 :
5353 : This function has following running time:
5354 : * O(NBasis*WindowWidth) for trend extraction phase (always performed)
5355 : * O(WindowWidth*NTicks) for forecast phase
5356 :
5357 : NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
5358 : apply recurrence relation to raw unprocessed data, use another
5359 : function - ssaforecastsequence() which allows to turn on and off
5360 : noise reduction phase.
5361 :
5362 : NOTE: this algorithm performs prediction using only one - last - sliding
5363 : window. Predictions produced by such approach are smooth
5364 : continuations of the reconstructed trend line, but they can be
5365 : easily corrupted by noise. If you need noise-resistant prediction,
5366 : use ssaforecastavglast() function, which averages predictions built
5367 : using several sliding windows.
5368 :
5369 : INPUT PARAMETERS:
5370 : S - SSA model
5371 : NTicks - number of ticks to forecast, NTicks>=1
5372 :
5373 : OUTPUT PARAMETERS:
5374 : Trend - array[NTicks], predicted trend line
5375 :
5376 :
5377 : CACHING/REUSE OF THE BASIS
5378 :
5379 : Caching/reuse of previous results is performed:
5380 : * first call performs full run of SSA; basis is stored in the cache
5381 : * subsequent calls reuse previously cached basis
5382 : * if you call any function which changes model properties (window length,
5383 : algorithm, dataset), internal basis will be invalidated.
5384 : * the only calls which do NOT invalidate basis are listed below:
5385 : a) ssasetwindow() with same window length
5386 : b) ssaappendpointandupdate()
5387 : c) ssaappendsequenceandupdate()
5388 : d) ssasetalgotopk...() with exactly same K
5389 : Calling these functions will result in reuse of previously found basis.
5390 :
5391 :
5392 : HANDLING OF DEGENERATE CASES
5393 :
5394 : Following degenerate cases may happen:
5395 : * dataset is empty (no analysis can be done)
5396 : * all sequences are shorter than the window length,no analysis can be done
5397 : * no algorithm is specified (no analysis can be done)
5398 : * last sequence is shorter than the WindowWidth (analysis can be done,
5399 : but we can not perform forecasting on the last sequence)
5400 : * window lentgh is 1 (impossible to use for forecasting)
5401 : * SSA analysis algorithm is configured to extract basis whose size is
5402 : equal to window length (impossible to use for forecasting; only basis
5403 : whose size is less than window length can be used).
5404 :
5405 : Calling this function in degenerate cases returns following result:
5406 : * NTicks copies of the last value is returned for non-empty task with
5407 : large enough dataset, but with overcomplete basis (window width=1 or
5408 : basis size is equal to window width)
5409 : * zero trend with length=NTicks is returned for empty task
5410 :
5411 : No analysis is performed in degenerate cases (we immediately return dummy
5412 : values, no basis is ever constructed).
5413 :
5414 : -- ALGLIB --
5415 : Copyright 30.10.2017 by Bochkanov Sergey
5416 : *************************************************************************/
5417 0 : void ssaforecastlast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams)
5418 : {
5419 : jmp_buf _break_jump;
5420 : alglib_impl::ae_state _alglib_env_state;
5421 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5422 0 : if( setjmp(_break_jump) )
5423 : {
5424 : #if !defined(AE_NO_EXCEPTIONS)
5425 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5426 : #else
5427 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5428 : return;
5429 : #endif
5430 : }
5431 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5432 0 : if( _xparams.flags!=0x0 )
5433 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5434 0 : alglib_impl::ssaforecastlast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5435 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5436 0 : return;
5437 : }
5438 :
5439 : /*************************************************************************
5440 : This function builds SSA basis and performs forecasting for a user-
5441 : specified sequence, returning value of trend.
5442 :
5443 : Forecasting is done in two stages:
5444 : * first, we extract trend from the WindowWidth last elements of the
5445 : sequence. This stage is optional, you can turn it off if you pass
5446 : data which are already processed with SSA. Of course, you can turn it
5447 : off even for raw data, but it is not recommended - noise suppression is
5448 : very important for correct prediction.
5449 : * then, we apply LRR for last WindowWidth-1 elements of the extracted
5450 : trend.
5451 :
5452 : This function has following running time:
5453 : * O(NBasis*WindowWidth) for trend extraction phase
5454 : * O(WindowWidth*NTicks) for forecast phase
5455 :
5456 : NOTE: this algorithm performs prediction using only one - last - sliding
5457 : window. Predictions produced by such approach are smooth
5458 : continuations of the reconstructed trend line, but they can be
5459 : easily corrupted by noise. If you need noise-resistant prediction,
5460 : use ssaforecastavgsequence() function, which averages predictions
5461 : built using several sliding windows.
5462 :
5463 : INPUT PARAMETERS:
5464 : S - SSA model
5465 : Data - array[NTicks], data to forecast
5466 : DataLen - number of ticks in the data, DataLen>=1
5467 : ForecastLen - number of ticks to predict, ForecastLen>=1
5468 : ApplySmoothing - whether to apply smoothing trend extraction or not;
5469 : if you do not know what to specify, pass True.
5470 :
5471 : OUTPUT PARAMETERS:
5472 : Trend - array[ForecastLen], forecasted trend
5473 :
5474 :
5475 : CACHING/REUSE OF THE BASIS
5476 :
5477 : Caching/reuse of previous results is performed:
5478 : * first call performs full run of SSA; basis is stored in the cache
5479 : * subsequent calls reuse previously cached basis
5480 : * if you call any function which changes model properties (window length,
5481 : algorithm, dataset), internal basis will be invalidated.
5482 : * the only calls which do NOT invalidate basis are listed below:
5483 : a) ssasetwindow() with same window length
5484 : b) ssaappendpointandupdate()
5485 : c) ssaappendsequenceandupdate()
5486 : d) ssasetalgotopk...() with exactly same K
5487 : Calling these functions will result in reuse of previously found basis.
5488 :
5489 :
5490 : HANDLING OF DEGENERATE CASES
5491 :
5492 : Following degenerate cases may happen:
5493 : * dataset is empty (no analysis can be done)
5494 : * all sequences are shorter than the window length,no analysis can be done
5495 : * no algorithm is specified (no analysis can be done)
5496 : * data sequence is shorter than the WindowWidth (analysis can be done,
5497 : but we can not perform forecasting on the last sequence)
5498 : * window lentgh is 1 (impossible to use for forecasting)
5499 : * SSA analysis algorithm is configured to extract basis whose size is
5500 : equal to window length (impossible to use for forecasting; only basis
5501 : whose size is less than window length can be used).
5502 :
5503 : Calling this function in degenerate cases returns following result:
5504 : * ForecastLen copies of the last value is returned for non-empty task with
5505 : large enough dataset, but with overcomplete basis (window width=1 or
5506 : basis size is equal to window width)
5507 : * zero trend with length=ForecastLen is returned for empty task
5508 :
5509 : No analysis is performed in degenerate cases (we immediately return dummy
5510 : values, no basis is ever constructed).
5511 :
5512 : -- ALGLIB --
5513 : Copyright 30.10.2017 by Bochkanov Sergey
5514 : *************************************************************************/
5515 0 : void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams)
5516 : {
5517 : jmp_buf _break_jump;
5518 : alglib_impl::ae_state _alglib_env_state;
5519 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5520 0 : if( setjmp(_break_jump) )
5521 : {
5522 : #if !defined(AE_NO_EXCEPTIONS)
5523 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5524 : #else
5525 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5526 : return;
5527 : #endif
5528 : }
5529 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5530 0 : if( _xparams.flags!=0x0 )
5531 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5532 0 : alglib_impl::ssaforecastsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5533 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5534 0 : return;
5535 : }
5536 :
5537 : /*************************************************************************
5538 : This function builds SSA basis and performs forecasting for a user-
5539 : specified sequence, returning value of trend.
5540 :
5541 : Forecasting is done in two stages:
5542 : * first, we extract trend from the WindowWidth last elements of the
5543 : sequence. This stage is optional, you can turn it off if you pass
5544 : data which are already processed with SSA. Of course, you can turn it
5545 : off even for raw data, but it is not recommended - noise suppression is
5546 : very important for correct prediction.
5547 : * then, we apply LRR for last WindowWidth-1 elements of the extracted
5548 : trend.
5549 :
5550 : This function has following running time:
5551 : * O(NBasis*WindowWidth) for trend extraction phase
5552 : * O(WindowWidth*NTicks) for forecast phase
5553 :
5554 : NOTE: this algorithm performs prediction using only one - last - sliding
5555 : window. Predictions produced by such approach are smooth
5556 : continuations of the reconstructed trend line, but they can be
5557 : easily corrupted by noise. If you need noise-resistant prediction,
5558 : use ssaforecastavgsequence() function, which averages predictions
5559 : built using several sliding windows.
5560 :
5561 : INPUT PARAMETERS:
5562 : S - SSA model
5563 : Data - array[NTicks], data to forecast
5564 : DataLen - number of ticks in the data, DataLen>=1
5565 : ForecastLen - number of ticks to predict, ForecastLen>=1
5566 : ApplySmoothing - whether to apply smoothing trend extraction or not;
5567 : if you do not know what to specify, pass True.
5568 :
5569 : OUTPUT PARAMETERS:
5570 : Trend - array[ForecastLen], forecasted trend
5571 :
5572 :
5573 : CACHING/REUSE OF THE BASIS
5574 :
5575 : Caching/reuse of previous results is performed:
5576 : * first call performs full run of SSA; basis is stored in the cache
5577 : * subsequent calls reuse previously cached basis
5578 : * if you call any function which changes model properties (window length,
5579 : algorithm, dataset), internal basis will be invalidated.
5580 : * the only calls which do NOT invalidate basis are listed below:
5581 : a) ssasetwindow() with same window length
5582 : b) ssaappendpointandupdate()
5583 : c) ssaappendsequenceandupdate()
5584 : d) ssasetalgotopk...() with exactly same K
5585 : Calling these functions will result in reuse of previously found basis.
5586 :
5587 :
5588 : HANDLING OF DEGENERATE CASES
5589 :
5590 : Following degenerate cases may happen:
5591 : * dataset is empty (no analysis can be done)
5592 : * all sequences are shorter than the window length,no analysis can be done
5593 : * no algorithm is specified (no analysis can be done)
5594 : * data sequence is shorter than the WindowWidth (analysis can be done,
5595 : but we can not perform forecasting on the last sequence)
5596 : * window lentgh is 1 (impossible to use for forecasting)
5597 : * SSA analysis algorithm is configured to extract basis whose size is
5598 : equal to window length (impossible to use for forecasting; only basis
5599 : whose size is less than window length can be used).
5600 :
5601 : Calling this function in degenerate cases returns following result:
5602 : * ForecastLen copies of the last value is returned for non-empty task with
5603 : large enough dataset, but with overcomplete basis (window width=1 or
5604 : basis size is equal to window width)
5605 : * zero trend with length=ForecastLen is returned for empty task
5606 :
5607 : No analysis is performed in degenerate cases (we immediately return dummy
5608 : values, no basis is ever constructed).
5609 :
5610 : -- ALGLIB --
5611 : Copyright 30.10.2017 by Bochkanov Sergey
5612 : *************************************************************************/
5613 : #if !defined(AE_NO_EXCEPTIONS)
5614 0 : void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams)
5615 : {
5616 : jmp_buf _break_jump;
5617 : alglib_impl::ae_state _alglib_env_state;
5618 : ae_int_t datalen;
5619 : bool applysmoothing;
5620 :
5621 0 : datalen = data.length();
5622 0 : applysmoothing = true;
5623 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5624 0 : if( setjmp(_break_jump) )
5625 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5626 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5627 0 : if( _xparams.flags!=0x0 )
5628 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5629 0 : alglib_impl::ssaforecastsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5630 :
5631 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5632 0 : return;
5633 : }
5634 : #endif
5635 :
5636 : /*************************************************************************
5637 : This function builds SSA basis and performs forecasting for a specified
5638 : number of ticks, returning value of trend.
5639 :
5640 : Forecast is performed as follows:
5641 : * SSA trend extraction is applied to last M sliding windows of the
5642 : internally stored dataset
5643 : * for each of M sliding windows, M predictions are built
5644 : * average value of M predictions is returned
5645 :
5646 : This function has following running time:
5647 : * O(NBasis*WindowWidth*M) for trend extraction phase (always performed)
5648 : * O(WindowWidth*NTicks*M) for forecast phase
5649 :
5650 : NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
5651 : apply recurrence relation to raw unprocessed data, use another
5652 : function - ssaforecastsequence() which allows to turn on and off
5653 : noise reduction phase.
5654 :
5655 : NOTE: combination of several predictions results in lesser sensitivity to
5656 : noise, but it may produce undesirable discontinuities between last
5657 : point of the trend and first point of the prediction. The reason is
5658 : that last point of the trend is usually corrupted by noise, but
5659 : average value of several predictions is less sensitive to noise,
5660 : thus discontinuity appears. It is not a bug.
5661 :
5662 : INPUT PARAMETERS:
5663 : S - SSA model
5664 : M - number of sliding windows to combine, M>=1. If
5665 : your dataset has less than M sliding windows, this
5666 : parameter will be silently reduced.
5667 : NTicks - number of ticks to forecast, NTicks>=1
5668 :
5669 : OUTPUT PARAMETERS:
5670 : Trend - array[NTicks], predicted trend line
5671 :
5672 :
5673 : CACHING/REUSE OF THE BASIS
5674 :
5675 : Caching/reuse of previous results is performed:
5676 : * first call performs full run of SSA; basis is stored in the cache
5677 : * subsequent calls reuse previously cached basis
5678 : * if you call any function which changes model properties (window length,
5679 : algorithm, dataset), internal basis will be invalidated.
5680 : * the only calls which do NOT invalidate basis are listed below:
5681 : a) ssasetwindow() with same window length
5682 : b) ssaappendpointandupdate()
5683 : c) ssaappendsequenceandupdate()
5684 : d) ssasetalgotopk...() with exactly same K
5685 : Calling these functions will result in reuse of previously found basis.
5686 :
5687 :
5688 : HANDLING OF DEGENERATE CASES
5689 :
5690 : Following degenerate cases may happen:
5691 : * dataset is empty (no analysis can be done)
5692 : * all sequences are shorter than the window length,no analysis can be done
5693 : * no algorithm is specified (no analysis can be done)
5694 : * last sequence is shorter than the WindowWidth (analysis can be done,
5695 : but we can not perform forecasting on the last sequence)
5696 : * window lentgh is 1 (impossible to use for forecasting)
5697 : * SSA analysis algorithm is configured to extract basis whose size is
5698 : equal to window length (impossible to use for forecasting; only basis
5699 : whose size is less than window length can be used).
5700 :
5701 : Calling this function in degenerate cases returns following result:
5702 : * NTicks copies of the last value is returned for non-empty task with
5703 : large enough dataset, but with overcomplete basis (window width=1 or
5704 : basis size is equal to window width)
5705 : * zero trend with length=NTicks is returned for empty task
5706 :
5707 : No analysis is performed in degenerate cases (we immediately return dummy
5708 : values, no basis is ever constructed).
5709 :
5710 : -- ALGLIB --
5711 : Copyright 30.10.2017 by Bochkanov Sergey
5712 : *************************************************************************/
5713 0 : void ssaforecastavglast(const ssamodel &s, const ae_int_t m, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams)
5714 : {
5715 : jmp_buf _break_jump;
5716 : alglib_impl::ae_state _alglib_env_state;
5717 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5718 0 : if( setjmp(_break_jump) )
5719 : {
5720 : #if !defined(AE_NO_EXCEPTIONS)
5721 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5722 : #else
5723 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5724 : return;
5725 : #endif
5726 : }
5727 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5728 0 : if( _xparams.flags!=0x0 )
5729 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5730 0 : alglib_impl::ssaforecastavglast(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), m, nticks, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5731 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5732 0 : return;
5733 : }
5734 :
5735 : /*************************************************************************
5736 : This function builds SSA basis and performs forecasting for a user-
5737 : specified sequence, returning value of trend.
5738 :
5739 : Forecasting is done in two stages:
5740 : * first, we extract trend from M last sliding windows of the sequence.
5741 : This stage is optional, you can turn it off if you pass data which
5742 : are already processed with SSA. Of course, you can turn it off even
5743 : for raw data, but it is not recommended - noise suppression is very
5744 : important for correct prediction.
5745 : * then, we apply LRR independently for M sliding windows
5746 : * average of M predictions is returned
5747 :
5748 : This function has following running time:
5749 : * O(NBasis*WindowWidth*M) for trend extraction phase
5750 : * O(WindowWidth*NTicks*M) for forecast phase
5751 :
5752 : NOTE: combination of several predictions results in lesser sensitivity to
5753 : noise, but it may produce undesirable discontinuities between last
5754 : point of the trend and first point of the prediction. The reason is
5755 : that last point of the trend is usually corrupted by noise, but
5756 : average value of several predictions is less sensitive to noise,
5757 : thus discontinuity appears. It is not a bug.
5758 :
5759 : INPUT PARAMETERS:
5760 : S - SSA model
5761 : Data - array[NTicks], data to forecast
5762 : DataLen - number of ticks in the data, DataLen>=1
5763 : M - number of sliding windows to combine, M>=1. If
5764 : your dataset has less than M sliding windows, this
5765 : parameter will be silently reduced.
5766 : ForecastLen - number of ticks to predict, ForecastLen>=1
5767 : ApplySmoothing - whether to apply smoothing trend extraction or not.
5768 : if you do not know what to specify, pass true.
5769 :
5770 : OUTPUT PARAMETERS:
5771 : Trend - array[ForecastLen], forecasted trend
5772 :
5773 :
5774 : CACHING/REUSE OF THE BASIS
5775 :
5776 : Caching/reuse of previous results is performed:
5777 : * first call performs full run of SSA; basis is stored in the cache
5778 : * subsequent calls reuse previously cached basis
5779 : * if you call any function which changes model properties (window length,
5780 : algorithm, dataset), internal basis will be invalidated.
5781 : * the only calls which do NOT invalidate basis are listed below:
5782 : a) ssasetwindow() with same window length
5783 : b) ssaappendpointandupdate()
5784 : c) ssaappendsequenceandupdate()
5785 : d) ssasetalgotopk...() with exactly same K
5786 : Calling these functions will result in reuse of previously found basis.
5787 :
5788 :
5789 : HANDLING OF DEGENERATE CASES
5790 :
5791 : Following degenerate cases may happen:
5792 : * dataset is empty (no analysis can be done)
5793 : * all sequences are shorter than the window length,no analysis can be done
5794 : * no algorithm is specified (no analysis can be done)
5795 : * data sequence is shorter than the WindowWidth (analysis can be done,
5796 : but we can not perform forecasting on the last sequence)
5797 : * window lentgh is 1 (impossible to use for forecasting)
5798 : * SSA analysis algorithm is configured to extract basis whose size is
5799 : equal to window length (impossible to use for forecasting; only basis
5800 : whose size is less than window length can be used).
5801 :
5802 : Calling this function in degenerate cases returns following result:
5803 : * ForecastLen copies of the last value is returned for non-empty task with
5804 : large enough dataset, but with overcomplete basis (window width=1 or
5805 : basis size is equal to window width)
5806 : * zero trend with length=ForecastLen is returned for empty task
5807 :
5808 : No analysis is performed in degenerate cases (we immediately return dummy
5809 : values, no basis is ever constructed).
5810 :
5811 : -- ALGLIB --
5812 : Copyright 30.10.2017 by Bochkanov Sergey
5813 : *************************************************************************/
5814 0 : void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t m, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams)
5815 : {
5816 : jmp_buf _break_jump;
5817 : alglib_impl::ae_state _alglib_env_state;
5818 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5819 0 : if( setjmp(_break_jump) )
5820 : {
5821 : #if !defined(AE_NO_EXCEPTIONS)
5822 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5823 : #else
5824 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
5825 : return;
5826 : #endif
5827 : }
5828 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5829 0 : if( _xparams.flags!=0x0 )
5830 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5831 0 : alglib_impl::ssaforecastavgsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, m, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5832 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5833 0 : return;
5834 : }
5835 :
5836 : /*************************************************************************
5837 : This function builds SSA basis and performs forecasting for a user-
5838 : specified sequence, returning value of trend.
5839 :
5840 : Forecasting is done in two stages:
5841 : * first, we extract trend from M last sliding windows of the sequence.
5842 : This stage is optional, you can turn it off if you pass data which
5843 : are already processed with SSA. Of course, you can turn it off even
5844 : for raw data, but it is not recommended - noise suppression is very
5845 : important for correct prediction.
5846 : * then, we apply LRR independently for M sliding windows
5847 : * average of M predictions is returned
5848 :
5849 : This function has following running time:
5850 : * O(NBasis*WindowWidth*M) for trend extraction phase
5851 : * O(WindowWidth*NTicks*M) for forecast phase
5852 :
5853 : NOTE: combination of several predictions results in lesser sensitivity to
5854 : noise, but it may produce undesirable discontinuities between last
5855 : point of the trend and first point of the prediction. The reason is
5856 : that last point of the trend is usually corrupted by noise, but
5857 : average value of several predictions is less sensitive to noise,
5858 : thus discontinuity appears. It is not a bug.
5859 :
5860 : INPUT PARAMETERS:
5861 : S - SSA model
5862 : Data - array[NTicks], data to forecast
5863 : DataLen - number of ticks in the data, DataLen>=1
5864 : M - number of sliding windows to combine, M>=1. If
5865 : your dataset has less than M sliding windows, this
5866 : parameter will be silently reduced.
5867 : ForecastLen - number of ticks to predict, ForecastLen>=1
5868 : ApplySmoothing - whether to apply smoothing trend extraction or not.
5869 : if you do not know what to specify, pass true.
5870 :
5871 : OUTPUT PARAMETERS:
5872 : Trend - array[ForecastLen], forecasted trend
5873 :
5874 :
5875 : CACHING/REUSE OF THE BASIS
5876 :
5877 : Caching/reuse of previous results is performed:
5878 : * first call performs full run of SSA; basis is stored in the cache
5879 : * subsequent calls reuse previously cached basis
5880 : * if you call any function which changes model properties (window length,
5881 : algorithm, dataset), internal basis will be invalidated.
5882 : * the only calls which do NOT invalidate basis are listed below:
5883 : a) ssasetwindow() with same window length
5884 : b) ssaappendpointandupdate()
5885 : c) ssaappendsequenceandupdate()
5886 : d) ssasetalgotopk...() with exactly same K
5887 : Calling these functions will result in reuse of previously found basis.
5888 :
5889 :
5890 : HANDLING OF DEGENERATE CASES
5891 :
5892 : Following degenerate cases may happen:
5893 : * dataset is empty (no analysis can be done)
5894 : * all sequences are shorter than the window length,no analysis can be done
5895 : * no algorithm is specified (no analysis can be done)
5896 : * data sequence is shorter than the WindowWidth (analysis can be done,
5897 : but we can not perform forecasting on the last sequence)
5898 : * window lentgh is 1 (impossible to use for forecasting)
5899 : * SSA analysis algorithm is configured to extract basis whose size is
5900 : equal to window length (impossible to use for forecasting; only basis
5901 : whose size is less than window length can be used).
5902 :
5903 : Calling this function in degenerate cases returns following result:
5904 : * ForecastLen copies of the last value is returned for non-empty task with
5905 : large enough dataset, but with overcomplete basis (window width=1 or
5906 : basis size is equal to window width)
5907 : * zero trend with length=ForecastLen is returned for empty task
5908 :
5909 : No analysis is performed in degenerate cases (we immediately return dummy
5910 : values, no basis is ever constructed).
5911 :
5912 : -- ALGLIB --
5913 : Copyright 30.10.2017 by Bochkanov Sergey
5914 : *************************************************************************/
5915 : #if !defined(AE_NO_EXCEPTIONS)
5916 0 : void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t m, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams)
5917 : {
5918 : jmp_buf _break_jump;
5919 : alglib_impl::ae_state _alglib_env_state;
5920 : ae_int_t datalen;
5921 : bool applysmoothing;
5922 :
5923 0 : datalen = data.length();
5924 0 : applysmoothing = true;
5925 0 : alglib_impl::ae_state_init(&_alglib_env_state);
5926 0 : if( setjmp(_break_jump) )
5927 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
5928 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
5929 0 : if( _xparams.flags!=0x0 )
5930 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
5931 0 : alglib_impl::ssaforecastavgsequence(const_cast<alglib_impl::ssamodel*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(data.c_ptr()), datalen, m, forecastlen, applysmoothing, const_cast<alglib_impl::ae_vector*>(trend.c_ptr()), &_alglib_env_state);
5932 :
5933 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
5934 0 : return;
5935 : }
5936 : #endif
5937 : #endif
5938 :
5939 : #if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
5940 : /*************************************************************************
5941 :
5942 : *************************************************************************/
5943 0 : _linearmodel_owner::_linearmodel_owner()
5944 : {
5945 : jmp_buf _break_jump;
5946 : alglib_impl::ae_state _state;
5947 :
5948 0 : alglib_impl::ae_state_init(&_state);
5949 0 : if( setjmp(_break_jump) )
5950 : {
5951 0 : if( p_struct!=NULL )
5952 : {
5953 0 : alglib_impl::_linearmodel_destroy(p_struct);
5954 0 : alglib_impl::ae_free(p_struct);
5955 : }
5956 0 : p_struct = NULL;
5957 : #if !defined(AE_NO_EXCEPTIONS)
5958 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
5959 : #else
5960 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
5961 : return;
5962 : #endif
5963 : }
5964 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
5965 0 : p_struct = NULL;
5966 0 : p_struct = (alglib_impl::linearmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::linearmodel), &_state);
5967 0 : memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
5968 0 : alglib_impl::_linearmodel_init(p_struct, &_state, ae_false);
5969 0 : ae_state_clear(&_state);
5970 0 : }
5971 :
5972 0 : _linearmodel_owner::_linearmodel_owner(const _linearmodel_owner &rhs)
5973 : {
5974 : jmp_buf _break_jump;
5975 : alglib_impl::ae_state _state;
5976 :
5977 0 : alglib_impl::ae_state_init(&_state);
5978 0 : if( setjmp(_break_jump) )
5979 : {
5980 0 : if( p_struct!=NULL )
5981 : {
5982 0 : alglib_impl::_linearmodel_destroy(p_struct);
5983 0 : alglib_impl::ae_free(p_struct);
5984 : }
5985 0 : p_struct = NULL;
5986 : #if !defined(AE_NO_EXCEPTIONS)
5987 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
5988 : #else
5989 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
5990 : return;
5991 : #endif
5992 : }
5993 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
5994 0 : p_struct = NULL;
5995 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: linearmodel copy constructor failure (source is not initialized)", &_state);
5996 0 : p_struct = (alglib_impl::linearmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::linearmodel), &_state);
5997 0 : memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
5998 0 : alglib_impl::_linearmodel_init_copy(p_struct, const_cast<alglib_impl::linearmodel*>(rhs.p_struct), &_state, ae_false);
5999 0 : ae_state_clear(&_state);
6000 0 : }
6001 :
6002 0 : _linearmodel_owner& _linearmodel_owner::operator=(const _linearmodel_owner &rhs)
6003 : {
6004 0 : if( this==&rhs )
6005 0 : return *this;
6006 : jmp_buf _break_jump;
6007 : alglib_impl::ae_state _state;
6008 :
6009 0 : alglib_impl::ae_state_init(&_state);
6010 0 : if( setjmp(_break_jump) )
6011 : {
6012 : #if !defined(AE_NO_EXCEPTIONS)
6013 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6014 : #else
6015 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6016 : return *this;
6017 : #endif
6018 : }
6019 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6020 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: linearmodel assignment constructor failure (destination is not initialized)", &_state);
6021 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: linearmodel assignment constructor failure (source is not initialized)", &_state);
6022 0 : alglib_impl::_linearmodel_destroy(p_struct);
6023 0 : memset(p_struct, 0, sizeof(alglib_impl::linearmodel));
6024 0 : alglib_impl::_linearmodel_init_copy(p_struct, const_cast<alglib_impl::linearmodel*>(rhs.p_struct), &_state, ae_false);
6025 0 : ae_state_clear(&_state);
6026 0 : return *this;
6027 : }
6028 :
6029 0 : _linearmodel_owner::~_linearmodel_owner()
6030 : {
6031 0 : if( p_struct!=NULL )
6032 : {
6033 0 : alglib_impl::_linearmodel_destroy(p_struct);
6034 0 : ae_free(p_struct);
6035 : }
6036 0 : }
6037 :
6038 0 : alglib_impl::linearmodel* _linearmodel_owner::c_ptr()
6039 : {
6040 0 : return p_struct;
6041 : }
6042 :
6043 0 : alglib_impl::linearmodel* _linearmodel_owner::c_ptr() const
6044 : {
6045 0 : return const_cast<alglib_impl::linearmodel*>(p_struct);
6046 : }
6047 0 : linearmodel::linearmodel() : _linearmodel_owner()
6048 : {
6049 0 : }
6050 :
6051 0 : linearmodel::linearmodel(const linearmodel &rhs):_linearmodel_owner(rhs)
6052 : {
6053 0 : }
6054 :
6055 0 : linearmodel& linearmodel::operator=(const linearmodel &rhs)
6056 : {
6057 0 : if( this==&rhs )
6058 0 : return *this;
6059 0 : _linearmodel_owner::operator=(rhs);
6060 0 : return *this;
6061 : }
6062 :
6063 0 : linearmodel::~linearmodel()
6064 : {
6065 0 : }
6066 :
6067 :
6068 : /*************************************************************************
6069 : LRReport structure contains additional information about linear model:
6070 : * C - covariation matrix, array[0..NVars,0..NVars].
6071 : C[i,j] = Cov(A[i],A[j])
6072 : * RMSError - root mean square error on a training set
6073 : * AvgError - average error on a training set
6074 : * AvgRelError - average relative error on a training set (excluding
6075 : observations with zero function value).
6076 : * CVRMSError - leave-one-out cross-validation estimate of
6077 : generalization error. Calculated using fast algorithm
6078 : with O(NVars*NPoints) complexity.
6079 : * CVAvgError - cross-validation estimate of average error
6080 : * CVAvgRelError - cross-validation estimate of average relative error
6081 :
6082 : All other fields of the structure are intended for internal use and should
6083 : not be used outside ALGLIB.
6084 : *************************************************************************/
6085 0 : _lrreport_owner::_lrreport_owner()
6086 : {
6087 : jmp_buf _break_jump;
6088 : alglib_impl::ae_state _state;
6089 :
6090 0 : alglib_impl::ae_state_init(&_state);
6091 0 : if( setjmp(_break_jump) )
6092 : {
6093 0 : if( p_struct!=NULL )
6094 : {
6095 0 : alglib_impl::_lrreport_destroy(p_struct);
6096 0 : alglib_impl::ae_free(p_struct);
6097 : }
6098 0 : p_struct = NULL;
6099 : #if !defined(AE_NO_EXCEPTIONS)
6100 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6101 : #else
6102 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6103 : return;
6104 : #endif
6105 : }
6106 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6107 0 : p_struct = NULL;
6108 0 : p_struct = (alglib_impl::lrreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::lrreport), &_state);
6109 0 : memset(p_struct, 0, sizeof(alglib_impl::lrreport));
6110 0 : alglib_impl::_lrreport_init(p_struct, &_state, ae_false);
6111 0 : ae_state_clear(&_state);
6112 0 : }
6113 :
6114 0 : _lrreport_owner::_lrreport_owner(const _lrreport_owner &rhs)
6115 : {
6116 : jmp_buf _break_jump;
6117 : alglib_impl::ae_state _state;
6118 :
6119 0 : alglib_impl::ae_state_init(&_state);
6120 0 : if( setjmp(_break_jump) )
6121 : {
6122 0 : if( p_struct!=NULL )
6123 : {
6124 0 : alglib_impl::_lrreport_destroy(p_struct);
6125 0 : alglib_impl::ae_free(p_struct);
6126 : }
6127 0 : p_struct = NULL;
6128 : #if !defined(AE_NO_EXCEPTIONS)
6129 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6130 : #else
6131 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6132 : return;
6133 : #endif
6134 : }
6135 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6136 0 : p_struct = NULL;
6137 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: lrreport copy constructor failure (source is not initialized)", &_state);
6138 0 : p_struct = (alglib_impl::lrreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::lrreport), &_state);
6139 0 : memset(p_struct, 0, sizeof(alglib_impl::lrreport));
6140 0 : alglib_impl::_lrreport_init_copy(p_struct, const_cast<alglib_impl::lrreport*>(rhs.p_struct), &_state, ae_false);
6141 0 : ae_state_clear(&_state);
6142 0 : }
6143 :
6144 0 : _lrreport_owner& _lrreport_owner::operator=(const _lrreport_owner &rhs)
6145 : {
6146 0 : if( this==&rhs )
6147 0 : return *this;
6148 : jmp_buf _break_jump;
6149 : alglib_impl::ae_state _state;
6150 :
6151 0 : alglib_impl::ae_state_init(&_state);
6152 0 : if( setjmp(_break_jump) )
6153 : {
6154 : #if !defined(AE_NO_EXCEPTIONS)
6155 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6156 : #else
6157 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6158 : return *this;
6159 : #endif
6160 : }
6161 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6162 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: lrreport assignment constructor failure (destination is not initialized)", &_state);
6163 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: lrreport assignment constructor failure (source is not initialized)", &_state);
6164 0 : alglib_impl::_lrreport_destroy(p_struct);
6165 0 : memset(p_struct, 0, sizeof(alglib_impl::lrreport));
6166 0 : alglib_impl::_lrreport_init_copy(p_struct, const_cast<alglib_impl::lrreport*>(rhs.p_struct), &_state, ae_false);
6167 0 : ae_state_clear(&_state);
6168 0 : return *this;
6169 : }
6170 :
6171 0 : _lrreport_owner::~_lrreport_owner()
6172 : {
6173 0 : if( p_struct!=NULL )
6174 : {
6175 0 : alglib_impl::_lrreport_destroy(p_struct);
6176 0 : ae_free(p_struct);
6177 : }
6178 0 : }
6179 :
6180 0 : alglib_impl::lrreport* _lrreport_owner::c_ptr()
6181 : {
6182 0 : return p_struct;
6183 : }
6184 :
6185 0 : alglib_impl::lrreport* _lrreport_owner::c_ptr() const
6186 : {
6187 0 : return const_cast<alglib_impl::lrreport*>(p_struct);
6188 : }
6189 0 : lrreport::lrreport() : _lrreport_owner() ,c(&p_struct->c),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),cvrmserror(p_struct->cvrmserror),cvavgerror(p_struct->cvavgerror),cvavgrelerror(p_struct->cvavgrelerror),ncvdefects(p_struct->ncvdefects),cvdefects(&p_struct->cvdefects)
6190 : {
6191 0 : }
6192 :
6193 0 : lrreport::lrreport(const lrreport &rhs):_lrreport_owner(rhs) ,c(&p_struct->c),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),cvrmserror(p_struct->cvrmserror),cvavgerror(p_struct->cvavgerror),cvavgrelerror(p_struct->cvavgrelerror),ncvdefects(p_struct->ncvdefects),cvdefects(&p_struct->cvdefects)
6194 : {
6195 0 : }
6196 :
6197 0 : lrreport& lrreport::operator=(const lrreport &rhs)
6198 : {
6199 0 : if( this==&rhs )
6200 0 : return *this;
6201 0 : _lrreport_owner::operator=(rhs);
6202 0 : return *this;
6203 : }
6204 :
6205 0 : lrreport::~lrreport()
6206 : {
6207 0 : }
6208 :
6209 : /*************************************************************************
6210 : Linear regression
6211 :
6212 : Subroutine builds model:
6213 :
6214 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
6215 :
6216 : and model found in ALGLIB format, covariation matrix, training set errors
6217 : (rms, average, average relative) and leave-one-out cross-validation
6218 : estimate of the generalization error. CV estimate calculated using fast
6219 : algorithm with O(NPoints*NVars) complexity.
6220 :
6221 : When covariation matrix is calculated standard deviations of function
6222 : values are assumed to be equal to RMS error on the training set.
6223 :
6224 : INPUT PARAMETERS:
6225 : XY - training set, array [0..NPoints-1,0..NVars]:
6226 : * NVars columns - independent variables
6227 : * last column - dependent variable
6228 : NPoints - training set size, NPoints>NVars+1
6229 : NVars - number of independent variables
6230 :
6231 : OUTPUT PARAMETERS:
6232 : Info - return code:
6233 : * -255, in case of unknown internal error
6234 : * -4, if internal SVD subroutine haven't converged
6235 : * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
6236 : * 1, if subroutine successfully finished
6237 : LM - linear model in the ALGLIB format. Use subroutines of
6238 : this unit to work with the model.
6239 : AR - additional results
6240 :
6241 :
6242 : -- ALGLIB --
6243 : Copyright 02.08.2008 by Bochkanov Sergey
6244 : *************************************************************************/
6245 0 : void lrbuild(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
6246 : {
6247 : jmp_buf _break_jump;
6248 : alglib_impl::ae_state _alglib_env_state;
6249 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6250 0 : if( setjmp(_break_jump) )
6251 : {
6252 : #if !defined(AE_NO_EXCEPTIONS)
6253 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6254 : #else
6255 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6256 : return;
6257 : #endif
6258 : }
6259 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6260 0 : if( _xparams.flags!=0x0 )
6261 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6262 0 : alglib_impl::lrbuild(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
6263 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6264 0 : return;
6265 : }
6266 :
6267 : /*************************************************************************
6268 : Linear regression
6269 :
6270 : Variant of LRBuild which uses vector of standatd deviations (errors in
6271 : function values).
6272 :
6273 : INPUT PARAMETERS:
6274 : XY - training set, array [0..NPoints-1,0..NVars]:
6275 : * NVars columns - independent variables
6276 : * last column - dependent variable
6277 : S - standard deviations (errors in function values)
6278 : array[0..NPoints-1], S[i]>0.
6279 : NPoints - training set size, NPoints>NVars+1
6280 : NVars - number of independent variables
6281 :
6282 : OUTPUT PARAMETERS:
6283 : Info - return code:
6284 : * -255, in case of unknown internal error
6285 : * -4, if internal SVD subroutine haven't converged
6286 : * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
6287 : * -2, if S[I]<=0
6288 : * 1, if subroutine successfully finished
6289 : LM - linear model in the ALGLIB format. Use subroutines of
6290 : this unit to work with the model.
6291 : AR - additional results
6292 :
6293 :
6294 : -- ALGLIB --
6295 : Copyright 02.08.2008 by Bochkanov Sergey
6296 : *************************************************************************/
6297 0 : void lrbuilds(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
6298 : {
6299 : jmp_buf _break_jump;
6300 : alglib_impl::ae_state _alglib_env_state;
6301 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6302 0 : if( setjmp(_break_jump) )
6303 : {
6304 : #if !defined(AE_NO_EXCEPTIONS)
6305 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6306 : #else
6307 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6308 : return;
6309 : #endif
6310 : }
6311 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6312 0 : if( _xparams.flags!=0x0 )
6313 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6314 0 : alglib_impl::lrbuilds(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(s.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
6315 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6316 0 : return;
6317 : }
6318 :
6319 : /*************************************************************************
6320 : Like LRBuildS, but builds model
6321 :
6322 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
6323 :
6324 : i.e. with zero constant term.
6325 :
6326 : -- ALGLIB --
6327 : Copyright 30.10.2008 by Bochkanov Sergey
6328 : *************************************************************************/
6329 0 : void lrbuildzs(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
6330 : {
6331 : jmp_buf _break_jump;
6332 : alglib_impl::ae_state _alglib_env_state;
6333 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6334 0 : if( setjmp(_break_jump) )
6335 : {
6336 : #if !defined(AE_NO_EXCEPTIONS)
6337 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6338 : #else
6339 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6340 : return;
6341 : #endif
6342 : }
6343 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6344 0 : if( _xparams.flags!=0x0 )
6345 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6346 0 : alglib_impl::lrbuildzs(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), const_cast<alglib_impl::ae_vector*>(s.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
6347 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6348 0 : return;
6349 : }
6350 :
6351 : /*************************************************************************
6352 : Like LRBuild but builds model
6353 :
6354 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
6355 :
6356 : i.e. with zero constant term.
6357 :
6358 : -- ALGLIB --
6359 : Copyright 30.10.2008 by Bochkanov Sergey
6360 : *************************************************************************/
6361 0 : void lrbuildz(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams)
6362 : {
6363 : jmp_buf _break_jump;
6364 : alglib_impl::ae_state _alglib_env_state;
6365 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6366 0 : if( setjmp(_break_jump) )
6367 : {
6368 : #if !defined(AE_NO_EXCEPTIONS)
6369 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6370 : #else
6371 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6372 : return;
6373 : #endif
6374 : }
6375 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6376 0 : if( _xparams.flags!=0x0 )
6377 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6378 0 : alglib_impl::lrbuildz(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, &info, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::lrreport*>(ar.c_ptr()), &_alglib_env_state);
6379 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6380 0 : return;
6381 : }
6382 :
6383 : /*************************************************************************
6384 : Unpacks coefficients of linear model.
6385 :
6386 : INPUT PARAMETERS:
6387 : LM - linear model in ALGLIB format
6388 :
6389 : OUTPUT PARAMETERS:
6390 : V - coefficients, array[0..NVars]
6391 : constant term (intercept) is stored in the V[NVars].
6392 : NVars - number of independent variables (one less than number
6393 : of coefficients)
6394 :
6395 : -- ALGLIB --
6396 : Copyright 30.08.2008 by Bochkanov Sergey
6397 : *************************************************************************/
6398 0 : void lrunpack(const linearmodel &lm, real_1d_array &v, ae_int_t &nvars, const xparams _xparams)
6399 : {
6400 : jmp_buf _break_jump;
6401 : alglib_impl::ae_state _alglib_env_state;
6402 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6403 0 : if( setjmp(_break_jump) )
6404 : {
6405 : #if !defined(AE_NO_EXCEPTIONS)
6406 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6407 : #else
6408 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6409 : return;
6410 : #endif
6411 : }
6412 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6413 0 : if( _xparams.flags!=0x0 )
6414 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6415 0 : alglib_impl::lrunpack(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(v.c_ptr()), &nvars, &_alglib_env_state);
6416 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6417 0 : return;
6418 : }
6419 :
6420 : /*************************************************************************
6421 : "Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
6422 : reversed).
6423 :
6424 : INPUT PARAMETERS:
6425 : V - coefficients, array[0..NVars]
6426 : NVars - number of independent variables
6427 :
6428 : OUTPUT PAREMETERS:
6429 : LM - linear model.
6430 :
6431 : -- ALGLIB --
6432 : Copyright 30.08.2008 by Bochkanov Sergey
6433 : *************************************************************************/
6434 0 : void lrpack(const real_1d_array &v, const ae_int_t nvars, linearmodel &lm, const xparams _xparams)
6435 : {
6436 : jmp_buf _break_jump;
6437 : alglib_impl::ae_state _alglib_env_state;
6438 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6439 0 : if( setjmp(_break_jump) )
6440 : {
6441 : #if !defined(AE_NO_EXCEPTIONS)
6442 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6443 : #else
6444 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6445 : return;
6446 : #endif
6447 : }
6448 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6449 0 : if( _xparams.flags!=0x0 )
6450 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6451 0 : alglib_impl::lrpack(const_cast<alglib_impl::ae_vector*>(v.c_ptr()), nvars, const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), &_alglib_env_state);
6452 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6453 0 : return;
6454 : }
6455 :
6456 : /*************************************************************************
6457 : Procesing
6458 :
6459 : INPUT PARAMETERS:
6460 : LM - linear model
6461 : X - input vector, array[0..NVars-1].
6462 :
6463 : Result:
6464 : value of linear model regression estimate
6465 :
6466 : -- ALGLIB --
6467 : Copyright 03.09.2008 by Bochkanov Sergey
6468 : *************************************************************************/
6469 0 : double lrprocess(const linearmodel &lm, const real_1d_array &x, const xparams _xparams)
6470 : {
6471 : jmp_buf _break_jump;
6472 : alglib_impl::ae_state _alglib_env_state;
6473 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6474 0 : if( setjmp(_break_jump) )
6475 : {
6476 : #if !defined(AE_NO_EXCEPTIONS)
6477 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6478 : #else
6479 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6480 : return 0;
6481 : #endif
6482 : }
6483 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6484 0 : if( _xparams.flags!=0x0 )
6485 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6486 0 : double result = alglib_impl::lrprocess(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
6487 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6488 0 : return *(reinterpret_cast<double*>(&result));
6489 : }
6490 :
6491 : /*************************************************************************
6492 : RMS error on the test set
6493 :
6494 : INPUT PARAMETERS:
6495 : LM - linear model
6496 : XY - test set
6497 : NPoints - test set size
6498 :
6499 : RESULT:
6500 : root mean square error.
6501 :
6502 : -- ALGLIB --
6503 : Copyright 30.08.2008 by Bochkanov Sergey
6504 : *************************************************************************/
6505 0 : double lrrmserror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
6506 : {
6507 : jmp_buf _break_jump;
6508 : alglib_impl::ae_state _alglib_env_state;
6509 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6510 0 : if( setjmp(_break_jump) )
6511 : {
6512 : #if !defined(AE_NO_EXCEPTIONS)
6513 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6514 : #else
6515 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6516 : return 0;
6517 : #endif
6518 : }
6519 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6520 0 : if( _xparams.flags!=0x0 )
6521 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6522 0 : double result = alglib_impl::lrrmserror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
6523 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6524 0 : return *(reinterpret_cast<double*>(&result));
6525 : }
6526 :
6527 : /*************************************************************************
6528 : Average error on the test set
6529 :
6530 : INPUT PARAMETERS:
6531 : LM - linear model
6532 : XY - test set
6533 : NPoints - test set size
6534 :
6535 : RESULT:
6536 : average error.
6537 :
6538 : -- ALGLIB --
6539 : Copyright 30.08.2008 by Bochkanov Sergey
6540 : *************************************************************************/
6541 0 : double lravgerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
6542 : {
6543 : jmp_buf _break_jump;
6544 : alglib_impl::ae_state _alglib_env_state;
6545 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6546 0 : if( setjmp(_break_jump) )
6547 : {
6548 : #if !defined(AE_NO_EXCEPTIONS)
6549 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6550 : #else
6551 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6552 : return 0;
6553 : #endif
6554 : }
6555 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6556 0 : if( _xparams.flags!=0x0 )
6557 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6558 0 : double result = alglib_impl::lravgerror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
6559 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6560 0 : return *(reinterpret_cast<double*>(&result));
6561 : }
6562 :
6563 : /*************************************************************************
6564 : RMS error on the test set
6565 :
6566 : INPUT PARAMETERS:
6567 : LM - linear model
6568 : XY - test set
6569 : NPoints - test set size
6570 :
6571 : RESULT:
6572 : average relative error.
6573 :
6574 : -- ALGLIB --
6575 : Copyright 30.08.2008 by Bochkanov Sergey
6576 : *************************************************************************/
6577 0 : double lravgrelerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
6578 : {
6579 : jmp_buf _break_jump;
6580 : alglib_impl::ae_state _alglib_env_state;
6581 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6582 0 : if( setjmp(_break_jump) )
6583 : {
6584 : #if !defined(AE_NO_EXCEPTIONS)
6585 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6586 : #else
6587 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6588 : return 0;
6589 : #endif
6590 : }
6591 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6592 0 : if( _xparams.flags!=0x0 )
6593 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6594 0 : double result = alglib_impl::lravgrelerror(const_cast<alglib_impl::linearmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
6595 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6596 0 : return *(reinterpret_cast<double*>(&result));
6597 : }
6598 : #endif
6599 :
6600 : #if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
6601 : /*************************************************************************
6602 : Filters: simple moving averages (unsymmetric).
6603 :
6604 : This filter replaces array by results of SMA(K) filter. SMA(K) is defined
6605 : as filter which averages at most K previous points (previous - not points
6606 : AROUND central point) - or less, in case of the first K-1 points.
6607 :
6608 : INPUT PARAMETERS:
6609 : X - array[N], array to process. It can be larger than N,
6610 : in this case only first N points are processed.
6611 : N - points count, N>=0
6612 : K - K>=1 (K can be larger than N , such cases will be
6613 : correctly handled). Window width. K=1 corresponds to
6614 : identity transformation (nothing changes).
6615 :
6616 : OUTPUT PARAMETERS:
6617 : X - array, whose first N elements were processed with SMA(K)
6618 :
6619 : NOTE 1: this function uses efficient in-place algorithm which does not
6620 : allocate temporary arrays.
6621 :
6622 : NOTE 2: this algorithm makes only one pass through array and uses running
6623 : sum to speed-up calculation of the averages. Additional measures
6624 : are taken to ensure that running sum on a long sequence of zero
6625 : elements will be correctly reset to zero even in the presence of
6626 : round-off error.
6627 :
6628 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
6629 : averages points after the current one. Only X[i], X[i-1], ... are
6630 : used when calculating new value of X[i]. We should also note that
6631 : this algorithm uses BOTH previous points and current one, i.e.
6632 : new value of X[i] depends on BOTH previous point and X[i] itself.
6633 :
6634 : -- ALGLIB --
6635 : Copyright 25.10.2011 by Bochkanov Sergey
6636 : *************************************************************************/
6637 0 : void filtersma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams)
6638 : {
6639 : jmp_buf _break_jump;
6640 : alglib_impl::ae_state _alglib_env_state;
6641 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6642 0 : if( setjmp(_break_jump) )
6643 : {
6644 : #if !defined(AE_NO_EXCEPTIONS)
6645 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6646 : #else
6647 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6648 : return;
6649 : #endif
6650 : }
6651 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6652 0 : if( _xparams.flags!=0x0 )
6653 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6654 0 : alglib_impl::filtersma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
6655 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6656 0 : return;
6657 : }
6658 :
6659 : /*************************************************************************
6660 : Filters: simple moving averages (unsymmetric).
6661 :
6662 : This filter replaces array by results of SMA(K) filter. SMA(K) is defined
6663 : as filter which averages at most K previous points (previous - not points
6664 : AROUND central point) - or less, in case of the first K-1 points.
6665 :
6666 : INPUT PARAMETERS:
6667 : X - array[N], array to process. It can be larger than N,
6668 : in this case only first N points are processed.
6669 : N - points count, N>=0
6670 : K - K>=1 (K can be larger than N , such cases will be
6671 : correctly handled). Window width. K=1 corresponds to
6672 : identity transformation (nothing changes).
6673 :
6674 : OUTPUT PARAMETERS:
6675 : X - array, whose first N elements were processed with SMA(K)
6676 :
6677 : NOTE 1: this function uses efficient in-place algorithm which does not
6678 : allocate temporary arrays.
6679 :
6680 : NOTE 2: this algorithm makes only one pass through array and uses running
6681 : sum to speed-up calculation of the averages. Additional measures
6682 : are taken to ensure that running sum on a long sequence of zero
6683 : elements will be correctly reset to zero even in the presence of
6684 : round-off error.
6685 :
6686 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
6687 : averages points after the current one. Only X[i], X[i-1], ... are
6688 : used when calculating new value of X[i]. We should also note that
6689 : this algorithm uses BOTH previous points and current one, i.e.
6690 : new value of X[i] depends on BOTH previous point and X[i] itself.
6691 :
6692 : -- ALGLIB --
6693 : Copyright 25.10.2011 by Bochkanov Sergey
6694 : *************************************************************************/
6695 : #if !defined(AE_NO_EXCEPTIONS)
6696 0 : void filtersma(real_1d_array &x, const ae_int_t k, const xparams _xparams)
6697 : {
6698 : jmp_buf _break_jump;
6699 : alglib_impl::ae_state _alglib_env_state;
6700 : ae_int_t n;
6701 :
6702 0 : n = x.length();
6703 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6704 0 : if( setjmp(_break_jump) )
6705 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6706 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6707 0 : if( _xparams.flags!=0x0 )
6708 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6709 0 : alglib_impl::filtersma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
6710 :
6711 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6712 0 : return;
6713 : }
6714 : #endif
6715 :
6716 : /*************************************************************************
6717 : Filters: exponential moving averages.
6718 :
6719 : This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
6720 : defined as filter which replaces X[] by S[]:
6721 : S[0] = X[0]
6722 : S[t] = alpha*X[t] + (1-alpha)*S[t-1]
6723 :
6724 : INPUT PARAMETERS:
6725 : X - array[N], array to process. It can be larger than N,
6726 : in this case only first N points are processed.
6727 : N - points count, N>=0
6728 : alpha - 0<alpha<=1, smoothing parameter.
6729 :
6730 : OUTPUT PARAMETERS:
6731 : X - array, whose first N elements were processed
6732 : with EMA(alpha)
6733 :
6734 : NOTE 1: this function uses efficient in-place algorithm which does not
6735 : allocate temporary arrays.
6736 :
6737 : NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
6738 : new value of X[i] depends on BOTH previous point and X[i] itself.
6739 :
6740 : NOTE 3: technical analytis users quite often work with EMA coefficient
6741 : expressed in DAYS instead of fractions. If you want to calculate
6742 : EMA(N), where N is a number of days, you can use alpha=2/(N+1).
6743 :
6744 : -- ALGLIB --
6745 : Copyright 25.10.2011 by Bochkanov Sergey
6746 : *************************************************************************/
6747 0 : void filterema(real_1d_array &x, const ae_int_t n, const double alpha, const xparams _xparams)
6748 : {
6749 : jmp_buf _break_jump;
6750 : alglib_impl::ae_state _alglib_env_state;
6751 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6752 0 : if( setjmp(_break_jump) )
6753 : {
6754 : #if !defined(AE_NO_EXCEPTIONS)
6755 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6756 : #else
6757 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6758 : return;
6759 : #endif
6760 : }
6761 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6762 0 : if( _xparams.flags!=0x0 )
6763 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6764 0 : alglib_impl::filterema(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, alpha, &_alglib_env_state);
6765 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6766 0 : return;
6767 : }
6768 :
6769 : /*************************************************************************
6770 : Filters: exponential moving averages.
6771 :
6772 : This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
6773 : defined as filter which replaces X[] by S[]:
6774 : S[0] = X[0]
6775 : S[t] = alpha*X[t] + (1-alpha)*S[t-1]
6776 :
6777 : INPUT PARAMETERS:
6778 : X - array[N], array to process. It can be larger than N,
6779 : in this case only first N points are processed.
6780 : N - points count, N>=0
6781 : alpha - 0<alpha<=1, smoothing parameter.
6782 :
6783 : OUTPUT PARAMETERS:
6784 : X - array, whose first N elements were processed
6785 : with EMA(alpha)
6786 :
6787 : NOTE 1: this function uses efficient in-place algorithm which does not
6788 : allocate temporary arrays.
6789 :
6790 : NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
6791 : new value of X[i] depends on BOTH previous point and X[i] itself.
6792 :
6793 : NOTE 3: technical analytis users quite often work with EMA coefficient
6794 : expressed in DAYS instead of fractions. If you want to calculate
6795 : EMA(N), where N is a number of days, you can use alpha=2/(N+1).
6796 :
6797 : -- ALGLIB --
6798 : Copyright 25.10.2011 by Bochkanov Sergey
6799 : *************************************************************************/
6800 : #if !defined(AE_NO_EXCEPTIONS)
6801 0 : void filterema(real_1d_array &x, const double alpha, const xparams _xparams)
6802 : {
6803 : jmp_buf _break_jump;
6804 : alglib_impl::ae_state _alglib_env_state;
6805 : ae_int_t n;
6806 :
6807 0 : n = x.length();
6808 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6809 0 : if( setjmp(_break_jump) )
6810 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6811 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6812 0 : if( _xparams.flags!=0x0 )
6813 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6814 0 : alglib_impl::filterema(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, alpha, &_alglib_env_state);
6815 :
6816 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6817 0 : return;
6818 : }
6819 : #endif
6820 :
6821 : /*************************************************************************
6822 : Filters: linear regression moving averages.
6823 :
6824 : This filter replaces array by results of LRMA(K) filter.
6825 :
6826 : LRMA(K) is defined as filter which, for each data point, builds linear
6827 : regression model using K prevous points (point itself is included in
6828 : these K points) and calculates value of this linear model at the point in
6829 : question.
6830 :
6831 : INPUT PARAMETERS:
6832 : X - array[N], array to process. It can be larger than N,
6833 : in this case only first N points are processed.
6834 : N - points count, N>=0
6835 : K - K>=1 (K can be larger than N , such cases will be
6836 : correctly handled). Window width. K=1 corresponds to
6837 : identity transformation (nothing changes).
6838 :
6839 : OUTPUT PARAMETERS:
6840 : X - array, whose first N elements were processed with SMA(K)
6841 :
6842 : NOTE 1: this function uses efficient in-place algorithm which does not
6843 : allocate temporary arrays.
6844 :
6845 : NOTE 2: this algorithm makes only one pass through array and uses running
6846 : sum to speed-up calculation of the averages. Additional measures
6847 : are taken to ensure that running sum on a long sequence of zero
6848 : elements will be correctly reset to zero even in the presence of
6849 : round-off error.
6850 :
6851 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
6852 : averages points after the current one. Only X[i], X[i-1], ... are
6853 : used when calculating new value of X[i]. We should also note that
6854 : this algorithm uses BOTH previous points and current one, i.e.
6855 : new value of X[i] depends on BOTH previous point and X[i] itself.
6856 :
6857 : -- ALGLIB --
6858 : Copyright 25.10.2011 by Bochkanov Sergey
6859 : *************************************************************************/
6860 0 : void filterlrma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams)
6861 : {
6862 : jmp_buf _break_jump;
6863 : alglib_impl::ae_state _alglib_env_state;
6864 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6865 0 : if( setjmp(_break_jump) )
6866 : {
6867 : #if !defined(AE_NO_EXCEPTIONS)
6868 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6869 : #else
6870 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
6871 : return;
6872 : #endif
6873 : }
6874 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6875 0 : if( _xparams.flags!=0x0 )
6876 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6877 0 : alglib_impl::filterlrma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
6878 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6879 0 : return;
6880 : }
6881 :
6882 : /*************************************************************************
6883 : Filters: linear regression moving averages.
6884 :
6885 : This filter replaces array by results of LRMA(K) filter.
6886 :
6887 : LRMA(K) is defined as filter which, for each data point, builds linear
6888 : regression model using K prevous points (point itself is included in
6889 : these K points) and calculates value of this linear model at the point in
6890 : question.
6891 :
6892 : INPUT PARAMETERS:
6893 : X - array[N], array to process. It can be larger than N,
6894 : in this case only first N points are processed.
6895 : N - points count, N>=0
6896 : K - K>=1 (K can be larger than N , such cases will be
6897 : correctly handled). Window width. K=1 corresponds to
6898 : identity transformation (nothing changes).
6899 :
6900 : OUTPUT PARAMETERS:
6901 : X - array, whose first N elements were processed with SMA(K)
6902 :
6903 : NOTE 1: this function uses efficient in-place algorithm which does not
6904 : allocate temporary arrays.
6905 :
6906 : NOTE 2: this algorithm makes only one pass through array and uses running
6907 : sum to speed-up calculation of the averages. Additional measures
6908 : are taken to ensure that running sum on a long sequence of zero
6909 : elements will be correctly reset to zero even in the presence of
6910 : round-off error.
6911 :
6912 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
6913 : averages points after the current one. Only X[i], X[i-1], ... are
6914 : used when calculating new value of X[i]. We should also note that
6915 : this algorithm uses BOTH previous points and current one, i.e.
6916 : new value of X[i] depends on BOTH previous point and X[i] itself.
6917 :
6918 : -- ALGLIB --
6919 : Copyright 25.10.2011 by Bochkanov Sergey
6920 : *************************************************************************/
6921 : #if !defined(AE_NO_EXCEPTIONS)
6922 0 : void filterlrma(real_1d_array &x, const ae_int_t k, const xparams _xparams)
6923 : {
6924 : jmp_buf _break_jump;
6925 : alglib_impl::ae_state _alglib_env_state;
6926 : ae_int_t n;
6927 :
6928 0 : n = x.length();
6929 0 : alglib_impl::ae_state_init(&_alglib_env_state);
6930 0 : if( setjmp(_break_jump) )
6931 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
6932 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
6933 0 : if( _xparams.flags!=0x0 )
6934 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
6935 0 : alglib_impl::filterlrma(const_cast<alglib_impl::ae_vector*>(x.c_ptr()), n, k, &_alglib_env_state);
6936 :
6937 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
6938 0 : return;
6939 : }
6940 : #endif
6941 : #endif
6942 :
6943 : #if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
6944 : /*************************************************************************
6945 :
6946 : *************************************************************************/
6947 0 : _logitmodel_owner::_logitmodel_owner()
6948 : {
6949 : jmp_buf _break_jump;
6950 : alglib_impl::ae_state _state;
6951 :
6952 0 : alglib_impl::ae_state_init(&_state);
6953 0 : if( setjmp(_break_jump) )
6954 : {
6955 0 : if( p_struct!=NULL )
6956 : {
6957 0 : alglib_impl::_logitmodel_destroy(p_struct);
6958 0 : alglib_impl::ae_free(p_struct);
6959 : }
6960 0 : p_struct = NULL;
6961 : #if !defined(AE_NO_EXCEPTIONS)
6962 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6963 : #else
6964 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6965 : return;
6966 : #endif
6967 : }
6968 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6969 0 : p_struct = NULL;
6970 0 : p_struct = (alglib_impl::logitmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::logitmodel), &_state);
6971 0 : memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
6972 0 : alglib_impl::_logitmodel_init(p_struct, &_state, ae_false);
6973 0 : ae_state_clear(&_state);
6974 0 : }
6975 :
6976 0 : _logitmodel_owner::_logitmodel_owner(const _logitmodel_owner &rhs)
6977 : {
6978 : jmp_buf _break_jump;
6979 : alglib_impl::ae_state _state;
6980 :
6981 0 : alglib_impl::ae_state_init(&_state);
6982 0 : if( setjmp(_break_jump) )
6983 : {
6984 0 : if( p_struct!=NULL )
6985 : {
6986 0 : alglib_impl::_logitmodel_destroy(p_struct);
6987 0 : alglib_impl::ae_free(p_struct);
6988 : }
6989 0 : p_struct = NULL;
6990 : #if !defined(AE_NO_EXCEPTIONS)
6991 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
6992 : #else
6993 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
6994 : return;
6995 : #endif
6996 : }
6997 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
6998 0 : p_struct = NULL;
6999 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: logitmodel copy constructor failure (source is not initialized)", &_state);
7000 0 : p_struct = (alglib_impl::logitmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::logitmodel), &_state);
7001 0 : memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
7002 0 : alglib_impl::_logitmodel_init_copy(p_struct, const_cast<alglib_impl::logitmodel*>(rhs.p_struct), &_state, ae_false);
7003 0 : ae_state_clear(&_state);
7004 0 : }
7005 :
7006 0 : _logitmodel_owner& _logitmodel_owner::operator=(const _logitmodel_owner &rhs)
7007 : {
7008 0 : if( this==&rhs )
7009 0 : return *this;
7010 : jmp_buf _break_jump;
7011 : alglib_impl::ae_state _state;
7012 :
7013 0 : alglib_impl::ae_state_init(&_state);
7014 0 : if( setjmp(_break_jump) )
7015 : {
7016 : #if !defined(AE_NO_EXCEPTIONS)
7017 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7018 : #else
7019 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7020 : return *this;
7021 : #endif
7022 : }
7023 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7024 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: logitmodel assignment constructor failure (destination is not initialized)", &_state);
7025 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: logitmodel assignment constructor failure (source is not initialized)", &_state);
7026 0 : alglib_impl::_logitmodel_destroy(p_struct);
7027 0 : memset(p_struct, 0, sizeof(alglib_impl::logitmodel));
7028 0 : alglib_impl::_logitmodel_init_copy(p_struct, const_cast<alglib_impl::logitmodel*>(rhs.p_struct), &_state, ae_false);
7029 0 : ae_state_clear(&_state);
7030 0 : return *this;
7031 : }
7032 :
7033 0 : _logitmodel_owner::~_logitmodel_owner()
7034 : {
7035 0 : if( p_struct!=NULL )
7036 : {
7037 0 : alglib_impl::_logitmodel_destroy(p_struct);
7038 0 : ae_free(p_struct);
7039 : }
7040 0 : }
7041 :
7042 0 : alglib_impl::logitmodel* _logitmodel_owner::c_ptr()
7043 : {
7044 0 : return p_struct;
7045 : }
7046 :
7047 0 : alglib_impl::logitmodel* _logitmodel_owner::c_ptr() const
7048 : {
7049 0 : return const_cast<alglib_impl::logitmodel*>(p_struct);
7050 : }
7051 0 : logitmodel::logitmodel() : _logitmodel_owner()
7052 : {
7053 0 : }
7054 :
7055 0 : logitmodel::logitmodel(const logitmodel &rhs):_logitmodel_owner(rhs)
7056 : {
7057 0 : }
7058 :
7059 0 : logitmodel& logitmodel::operator=(const logitmodel &rhs)
7060 : {
7061 0 : if( this==&rhs )
7062 0 : return *this;
7063 0 : _logitmodel_owner::operator=(rhs);
7064 0 : return *this;
7065 : }
7066 :
7067 0 : logitmodel::~logitmodel()
7068 : {
7069 0 : }
7070 :
7071 :
7072 : /*************************************************************************
7073 : MNLReport structure contains information about training process:
7074 : * NGrad - number of gradient calculations
7075 : * NHess - number of Hessian calculations
7076 : *************************************************************************/
7077 0 : _mnlreport_owner::_mnlreport_owner()
7078 : {
7079 : jmp_buf _break_jump;
7080 : alglib_impl::ae_state _state;
7081 :
7082 0 : alglib_impl::ae_state_init(&_state);
7083 0 : if( setjmp(_break_jump) )
7084 : {
7085 0 : if( p_struct!=NULL )
7086 : {
7087 0 : alglib_impl::_mnlreport_destroy(p_struct);
7088 0 : alglib_impl::ae_free(p_struct);
7089 : }
7090 0 : p_struct = NULL;
7091 : #if !defined(AE_NO_EXCEPTIONS)
7092 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7093 : #else
7094 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7095 : return;
7096 : #endif
7097 : }
7098 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7099 0 : p_struct = NULL;
7100 0 : p_struct = (alglib_impl::mnlreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mnlreport), &_state);
7101 0 : memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
7102 0 : alglib_impl::_mnlreport_init(p_struct, &_state, ae_false);
7103 0 : ae_state_clear(&_state);
7104 0 : }
7105 :
7106 0 : _mnlreport_owner::_mnlreport_owner(const _mnlreport_owner &rhs)
7107 : {
7108 : jmp_buf _break_jump;
7109 : alglib_impl::ae_state _state;
7110 :
7111 0 : alglib_impl::ae_state_init(&_state);
7112 0 : if( setjmp(_break_jump) )
7113 : {
7114 0 : if( p_struct!=NULL )
7115 : {
7116 0 : alglib_impl::_mnlreport_destroy(p_struct);
7117 0 : alglib_impl::ae_free(p_struct);
7118 : }
7119 0 : p_struct = NULL;
7120 : #if !defined(AE_NO_EXCEPTIONS)
7121 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7122 : #else
7123 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7124 : return;
7125 : #endif
7126 : }
7127 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7128 0 : p_struct = NULL;
7129 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mnlreport copy constructor failure (source is not initialized)", &_state);
7130 0 : p_struct = (alglib_impl::mnlreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mnlreport), &_state);
7131 0 : memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
7132 0 : alglib_impl::_mnlreport_init_copy(p_struct, const_cast<alglib_impl::mnlreport*>(rhs.p_struct), &_state, ae_false);
7133 0 : ae_state_clear(&_state);
7134 0 : }
7135 :
7136 0 : _mnlreport_owner& _mnlreport_owner::operator=(const _mnlreport_owner &rhs)
7137 : {
7138 0 : if( this==&rhs )
7139 0 : return *this;
7140 : jmp_buf _break_jump;
7141 : alglib_impl::ae_state _state;
7142 :
7143 0 : alglib_impl::ae_state_init(&_state);
7144 0 : if( setjmp(_break_jump) )
7145 : {
7146 : #if !defined(AE_NO_EXCEPTIONS)
7147 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7148 : #else
7149 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7150 : return *this;
7151 : #endif
7152 : }
7153 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7154 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mnlreport assignment constructor failure (destination is not initialized)", &_state);
7155 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mnlreport assignment constructor failure (source is not initialized)", &_state);
7156 0 : alglib_impl::_mnlreport_destroy(p_struct);
7157 0 : memset(p_struct, 0, sizeof(alglib_impl::mnlreport));
7158 0 : alglib_impl::_mnlreport_init_copy(p_struct, const_cast<alglib_impl::mnlreport*>(rhs.p_struct), &_state, ae_false);
7159 0 : ae_state_clear(&_state);
7160 0 : return *this;
7161 : }
7162 :
7163 0 : _mnlreport_owner::~_mnlreport_owner()
7164 : {
7165 0 : if( p_struct!=NULL )
7166 : {
7167 0 : alglib_impl::_mnlreport_destroy(p_struct);
7168 0 : ae_free(p_struct);
7169 : }
7170 0 : }
7171 :
7172 0 : alglib_impl::mnlreport* _mnlreport_owner::c_ptr()
7173 : {
7174 0 : return p_struct;
7175 : }
7176 :
7177 0 : alglib_impl::mnlreport* _mnlreport_owner::c_ptr() const
7178 : {
7179 0 : return const_cast<alglib_impl::mnlreport*>(p_struct);
7180 : }
7181 0 : mnlreport::mnlreport() : _mnlreport_owner() ,ngrad(p_struct->ngrad),nhess(p_struct->nhess)
7182 : {
7183 0 : }
7184 :
7185 0 : mnlreport::mnlreport(const mnlreport &rhs):_mnlreport_owner(rhs) ,ngrad(p_struct->ngrad),nhess(p_struct->nhess)
7186 : {
7187 0 : }
7188 :
7189 0 : mnlreport& mnlreport::operator=(const mnlreport &rhs)
7190 : {
7191 0 : if( this==&rhs )
7192 0 : return *this;
7193 0 : _mnlreport_owner::operator=(rhs);
7194 0 : return *this;
7195 : }
7196 :
7197 0 : mnlreport::~mnlreport()
7198 : {
7199 0 : }
7200 :
7201 : /*************************************************************************
7202 : This subroutine trains logit model.
7203 :
7204 : INPUT PARAMETERS:
7205 : XY - training set, array[0..NPoints-1,0..NVars]
7206 : First NVars columns store values of independent
7207 : variables, next column stores number of class (from 0
7208 : to NClasses-1) which dataset element belongs to. Fractional
7209 : values are rounded to nearest integer.
7210 : NPoints - training set size, NPoints>=1
7211 : NVars - number of independent variables, NVars>=1
7212 : NClasses - number of classes, NClasses>=2
7213 :
7214 : OUTPUT PARAMETERS:
7215 : Info - return code:
7216 : * -2, if there is a point with class number
7217 : outside of [0..NClasses-1].
7218 : * -1, if incorrect parameters was passed
7219 : (NPoints<NVars+2, NVars<1, NClasses<2).
7220 : * 1, if task has been solved
7221 : LM - model built
7222 : Rep - training report
7223 :
7224 : -- ALGLIB --
7225 : Copyright 10.09.2008 by Bochkanov Sergey
7226 : *************************************************************************/
7227 0 : void mnltrainh(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, logitmodel &lm, mnlreport &rep, const xparams _xparams)
7228 : {
7229 : jmp_buf _break_jump;
7230 : alglib_impl::ae_state _alglib_env_state;
7231 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7232 0 : if( setjmp(_break_jump) )
7233 : {
7234 : #if !defined(AE_NO_EXCEPTIONS)
7235 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7236 : #else
7237 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7238 : return;
7239 : #endif
7240 : }
7241 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7242 0 : if( _xparams.flags!=0x0 )
7243 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7244 0 : alglib_impl::mnltrainh(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &info, const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::mnlreport*>(rep.c_ptr()), &_alglib_env_state);
7245 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7246 0 : return;
7247 : }
7248 :
7249 : /*************************************************************************
7250 : Procesing
7251 :
7252 : INPUT PARAMETERS:
7253 : LM - logit model, passed by non-constant reference
7254 : (some fields of structure are used as temporaries
7255 : when calculating model output).
7256 : X - input vector, array[0..NVars-1].
7257 : Y - (possibly) preallocated buffer; if size of Y is less than
7258 : NClasses, it will be reallocated.If it is large enough, it
7259 : is NOT reallocated, so we can save some time on reallocation.
7260 :
7261 : OUTPUT PARAMETERS:
7262 : Y - result, array[0..NClasses-1]
7263 : Vector of posterior probabilities for classification task.
7264 :
7265 : -- ALGLIB --
7266 : Copyright 10.09.2008 by Bochkanov Sergey
7267 : *************************************************************************/
7268 0 : void mnlprocess(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
7269 : {
7270 : jmp_buf _break_jump;
7271 : alglib_impl::ae_state _alglib_env_state;
7272 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7273 0 : if( setjmp(_break_jump) )
7274 : {
7275 : #if !defined(AE_NO_EXCEPTIONS)
7276 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7277 : #else
7278 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7279 : return;
7280 : #endif
7281 : }
7282 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7283 0 : if( _xparams.flags!=0x0 )
7284 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7285 0 : alglib_impl::mnlprocess(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
7286 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7287 0 : return;
7288 : }
7289 :
7290 : /*************************************************************************
7291 : 'interactive' variant of MNLProcess for languages like Python which
7292 : support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the
7293 : interpreter
7294 :
7295 : This function allocates new array on each call, so it is significantly
7296 : slower than its 'non-interactive' counterpart, but it is more convenient
7297 : when you call it from command line.
7298 :
7299 : -- ALGLIB --
7300 : Copyright 10.09.2008 by Bochkanov Sergey
7301 : *************************************************************************/
7302 0 : void mnlprocessi(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
7303 : {
7304 : jmp_buf _break_jump;
7305 : alglib_impl::ae_state _alglib_env_state;
7306 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7307 0 : if( setjmp(_break_jump) )
7308 : {
7309 : #if !defined(AE_NO_EXCEPTIONS)
7310 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7311 : #else
7312 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7313 : return;
7314 : #endif
7315 : }
7316 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7317 0 : if( _xparams.flags!=0x0 )
7318 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7319 0 : alglib_impl::mnlprocessi(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
7320 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7321 0 : return;
7322 : }
7323 :
7324 : /*************************************************************************
7325 : Unpacks coefficients of logit model. Logit model have form:
7326 :
7327 : P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1))
7328 : S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1
7329 : S(M-1) = 1
7330 :
7331 : INPUT PARAMETERS:
7332 : LM - logit model in ALGLIB format
7333 :
7334 : OUTPUT PARAMETERS:
7335 : V - coefficients, array[0..NClasses-2,0..NVars]
7336 : NVars - number of independent variables
7337 : NClasses - number of classes
7338 :
7339 : -- ALGLIB --
7340 : Copyright 10.09.2008 by Bochkanov Sergey
7341 : *************************************************************************/
7342 0 : void mnlunpack(const logitmodel &lm, real_2d_array &a, ae_int_t &nvars, ae_int_t &nclasses, const xparams _xparams)
7343 : {
7344 : jmp_buf _break_jump;
7345 : alglib_impl::ae_state _alglib_env_state;
7346 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7347 0 : if( setjmp(_break_jump) )
7348 : {
7349 : #if !defined(AE_NO_EXCEPTIONS)
7350 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7351 : #else
7352 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7353 : return;
7354 : #endif
7355 : }
7356 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7357 0 : if( _xparams.flags!=0x0 )
7358 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7359 0 : alglib_impl::mnlunpack(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), &nvars, &nclasses, &_alglib_env_state);
7360 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7361 0 : return;
7362 : }
7363 :
7364 : /*************************************************************************
7365 : "Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack
7366 : reversed).
7367 :
7368 : INPUT PARAMETERS:
7369 : A - model (see MNLUnpack)
7370 : NVars - number of independent variables
7371 : NClasses - number of classes
7372 :
7373 : OUTPUT PARAMETERS:
7374 : LM - logit model.
7375 :
7376 : -- ALGLIB --
7377 : Copyright 10.09.2008 by Bochkanov Sergey
7378 : *************************************************************************/
7379 0 : void mnlpack(const real_2d_array &a, const ae_int_t nvars, const ae_int_t nclasses, logitmodel &lm, const xparams _xparams)
7380 : {
7381 : jmp_buf _break_jump;
7382 : alglib_impl::ae_state _alglib_env_state;
7383 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7384 0 : if( setjmp(_break_jump) )
7385 : {
7386 : #if !defined(AE_NO_EXCEPTIONS)
7387 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7388 : #else
7389 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7390 : return;
7391 : #endif
7392 : }
7393 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7394 0 : if( _xparams.flags!=0x0 )
7395 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7396 0 : alglib_impl::mnlpack(const_cast<alglib_impl::ae_matrix*>(a.c_ptr()), nvars, nclasses, const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), &_alglib_env_state);
7397 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7398 0 : return;
7399 : }
7400 :
7401 : /*************************************************************************
7402 : Average cross-entropy (in bits per element) on the test set
7403 :
7404 : INPUT PARAMETERS:
7405 : LM - logit model
7406 : XY - test set
7407 : NPoints - test set size
7408 :
7409 : RESULT:
7410 : CrossEntropy/(NPoints*ln(2)).
7411 :
7412 : -- ALGLIB --
7413 : Copyright 10.09.2008 by Bochkanov Sergey
7414 : *************************************************************************/
7415 0 : double mnlavgce(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
7416 : {
7417 : jmp_buf _break_jump;
7418 : alglib_impl::ae_state _alglib_env_state;
7419 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7420 0 : if( setjmp(_break_jump) )
7421 : {
7422 : #if !defined(AE_NO_EXCEPTIONS)
7423 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7424 : #else
7425 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7426 : return 0;
7427 : #endif
7428 : }
7429 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7430 0 : if( _xparams.flags!=0x0 )
7431 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7432 0 : double result = alglib_impl::mnlavgce(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
7433 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7434 0 : return *(reinterpret_cast<double*>(&result));
7435 : }
7436 :
7437 : /*************************************************************************
7438 : Relative classification error on the test set
7439 :
7440 : INPUT PARAMETERS:
7441 : LM - logit model
7442 : XY - test set
7443 : NPoints - test set size
7444 :
7445 : RESULT:
7446 : percent of incorrectly classified cases.
7447 :
7448 : -- ALGLIB --
7449 : Copyright 10.09.2008 by Bochkanov Sergey
7450 : *************************************************************************/
7451 0 : double mnlrelclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
7452 : {
7453 : jmp_buf _break_jump;
7454 : alglib_impl::ae_state _alglib_env_state;
7455 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7456 0 : if( setjmp(_break_jump) )
7457 : {
7458 : #if !defined(AE_NO_EXCEPTIONS)
7459 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7460 : #else
7461 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7462 : return 0;
7463 : #endif
7464 : }
7465 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7466 0 : if( _xparams.flags!=0x0 )
7467 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7468 0 : double result = alglib_impl::mnlrelclserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
7469 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7470 0 : return *(reinterpret_cast<double*>(&result));
7471 : }
7472 :
7473 : /*************************************************************************
7474 : RMS error on the test set
7475 :
7476 : INPUT PARAMETERS:
7477 : LM - logit model
7478 : XY - test set
7479 : NPoints - test set size
7480 :
7481 : RESULT:
7482 : root mean square error (error when estimating posterior probabilities).
7483 :
7484 : -- ALGLIB --
7485 : Copyright 30.08.2008 by Bochkanov Sergey
7486 : *************************************************************************/
7487 0 : double mnlrmserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
7488 : {
7489 : jmp_buf _break_jump;
7490 : alglib_impl::ae_state _alglib_env_state;
7491 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7492 0 : if( setjmp(_break_jump) )
7493 : {
7494 : #if !defined(AE_NO_EXCEPTIONS)
7495 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7496 : #else
7497 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7498 : return 0;
7499 : #endif
7500 : }
7501 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7502 0 : if( _xparams.flags!=0x0 )
7503 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7504 0 : double result = alglib_impl::mnlrmserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
7505 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7506 0 : return *(reinterpret_cast<double*>(&result));
7507 : }
7508 :
7509 : /*************************************************************************
7510 : Average error on the test set
7511 :
7512 : INPUT PARAMETERS:
7513 : LM - logit model
7514 : XY - test set
7515 : NPoints - test set size
7516 :
7517 : RESULT:
7518 : average error (error when estimating posterior probabilities).
7519 :
7520 : -- ALGLIB --
7521 : Copyright 30.08.2008 by Bochkanov Sergey
7522 : *************************************************************************/
7523 0 : double mnlavgerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
7524 : {
7525 : jmp_buf _break_jump;
7526 : alglib_impl::ae_state _alglib_env_state;
7527 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7528 0 : if( setjmp(_break_jump) )
7529 : {
7530 : #if !defined(AE_NO_EXCEPTIONS)
7531 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7532 : #else
7533 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7534 : return 0;
7535 : #endif
7536 : }
7537 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7538 0 : if( _xparams.flags!=0x0 )
7539 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7540 0 : double result = alglib_impl::mnlavgerror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
7541 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7542 0 : return *(reinterpret_cast<double*>(&result));
7543 : }
7544 :
7545 : /*************************************************************************
7546 : Average relative error on the test set
7547 :
7548 : INPUT PARAMETERS:
7549 : LM - logit model
7550 : XY - test set
7551 : NPoints - test set size
7552 :
7553 : RESULT:
7554 : average relative error (error when estimating posterior probabilities).
7555 :
7556 : -- ALGLIB --
7557 : Copyright 30.08.2008 by Bochkanov Sergey
7558 : *************************************************************************/
7559 0 : double mnlavgrelerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams)
7560 : {
7561 : jmp_buf _break_jump;
7562 : alglib_impl::ae_state _alglib_env_state;
7563 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7564 0 : if( setjmp(_break_jump) )
7565 : {
7566 : #if !defined(AE_NO_EXCEPTIONS)
7567 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7568 : #else
7569 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7570 : return 0;
7571 : #endif
7572 : }
7573 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7574 0 : if( _xparams.flags!=0x0 )
7575 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7576 0 : double result = alglib_impl::mnlavgrelerror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), ssize, &_alglib_env_state);
7577 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7578 0 : return *(reinterpret_cast<double*>(&result));
7579 : }
7580 :
7581 : /*************************************************************************
7582 : Classification error on test set = MNLRelClsError*NPoints
7583 :
7584 : -- ALGLIB --
7585 : Copyright 10.09.2008 by Bochkanov Sergey
7586 : *************************************************************************/
7587 0 : ae_int_t mnlclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
7588 : {
7589 : jmp_buf _break_jump;
7590 : alglib_impl::ae_state _alglib_env_state;
7591 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7592 0 : if( setjmp(_break_jump) )
7593 : {
7594 : #if !defined(AE_NO_EXCEPTIONS)
7595 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7596 : #else
7597 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7598 : return 0;
7599 : #endif
7600 : }
7601 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7602 0 : if( _xparams.flags!=0x0 )
7603 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7604 0 : alglib_impl::ae_int_t result = alglib_impl::mnlclserror(const_cast<alglib_impl::logitmodel*>(lm.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
7605 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7606 0 : return *(reinterpret_cast<ae_int_t*>(&result));
7607 : }
7608 : #endif
7609 :
7610 : #if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
7611 : /*************************************************************************
7612 : This structure is a MCPD (Markov Chains for Population Data) solver.
7613 :
7614 : You should use ALGLIB functions in order to work with this object.
7615 :
7616 : -- ALGLIB --
7617 : Copyright 23.05.2010 by Bochkanov Sergey
7618 : *************************************************************************/
7619 0 : _mcpdstate_owner::_mcpdstate_owner()
7620 : {
7621 : jmp_buf _break_jump;
7622 : alglib_impl::ae_state _state;
7623 :
7624 0 : alglib_impl::ae_state_init(&_state);
7625 0 : if( setjmp(_break_jump) )
7626 : {
7627 0 : if( p_struct!=NULL )
7628 : {
7629 0 : alglib_impl::_mcpdstate_destroy(p_struct);
7630 0 : alglib_impl::ae_free(p_struct);
7631 : }
7632 0 : p_struct = NULL;
7633 : #if !defined(AE_NO_EXCEPTIONS)
7634 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7635 : #else
7636 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7637 : return;
7638 : #endif
7639 : }
7640 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7641 0 : p_struct = NULL;
7642 0 : p_struct = (alglib_impl::mcpdstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdstate), &_state);
7643 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
7644 0 : alglib_impl::_mcpdstate_init(p_struct, &_state, ae_false);
7645 0 : ae_state_clear(&_state);
7646 0 : }
7647 :
7648 0 : _mcpdstate_owner::_mcpdstate_owner(const _mcpdstate_owner &rhs)
7649 : {
7650 : jmp_buf _break_jump;
7651 : alglib_impl::ae_state _state;
7652 :
7653 0 : alglib_impl::ae_state_init(&_state);
7654 0 : if( setjmp(_break_jump) )
7655 : {
7656 0 : if( p_struct!=NULL )
7657 : {
7658 0 : alglib_impl::_mcpdstate_destroy(p_struct);
7659 0 : alglib_impl::ae_free(p_struct);
7660 : }
7661 0 : p_struct = NULL;
7662 : #if !defined(AE_NO_EXCEPTIONS)
7663 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7664 : #else
7665 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7666 : return;
7667 : #endif
7668 : }
7669 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7670 0 : p_struct = NULL;
7671 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdstate copy constructor failure (source is not initialized)", &_state);
7672 0 : p_struct = (alglib_impl::mcpdstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdstate), &_state);
7673 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
7674 0 : alglib_impl::_mcpdstate_init_copy(p_struct, const_cast<alglib_impl::mcpdstate*>(rhs.p_struct), &_state, ae_false);
7675 0 : ae_state_clear(&_state);
7676 0 : }
7677 :
7678 0 : _mcpdstate_owner& _mcpdstate_owner::operator=(const _mcpdstate_owner &rhs)
7679 : {
7680 0 : if( this==&rhs )
7681 0 : return *this;
7682 : jmp_buf _break_jump;
7683 : alglib_impl::ae_state _state;
7684 :
7685 0 : alglib_impl::ae_state_init(&_state);
7686 0 : if( setjmp(_break_jump) )
7687 : {
7688 : #if !defined(AE_NO_EXCEPTIONS)
7689 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7690 : #else
7691 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7692 : return *this;
7693 : #endif
7694 : }
7695 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7696 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mcpdstate assignment constructor failure (destination is not initialized)", &_state);
7697 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdstate assignment constructor failure (source is not initialized)", &_state);
7698 0 : alglib_impl::_mcpdstate_destroy(p_struct);
7699 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdstate));
7700 0 : alglib_impl::_mcpdstate_init_copy(p_struct, const_cast<alglib_impl::mcpdstate*>(rhs.p_struct), &_state, ae_false);
7701 0 : ae_state_clear(&_state);
7702 0 : return *this;
7703 : }
7704 :
7705 0 : _mcpdstate_owner::~_mcpdstate_owner()
7706 : {
7707 0 : if( p_struct!=NULL )
7708 : {
7709 0 : alglib_impl::_mcpdstate_destroy(p_struct);
7710 0 : ae_free(p_struct);
7711 : }
7712 0 : }
7713 :
7714 0 : alglib_impl::mcpdstate* _mcpdstate_owner::c_ptr()
7715 : {
7716 0 : return p_struct;
7717 : }
7718 :
7719 0 : alglib_impl::mcpdstate* _mcpdstate_owner::c_ptr() const
7720 : {
7721 0 : return const_cast<alglib_impl::mcpdstate*>(p_struct);
7722 : }
7723 0 : mcpdstate::mcpdstate() : _mcpdstate_owner()
7724 : {
7725 0 : }
7726 :
7727 0 : mcpdstate::mcpdstate(const mcpdstate &rhs):_mcpdstate_owner(rhs)
7728 : {
7729 0 : }
7730 :
7731 0 : mcpdstate& mcpdstate::operator=(const mcpdstate &rhs)
7732 : {
7733 0 : if( this==&rhs )
7734 0 : return *this;
7735 0 : _mcpdstate_owner::operator=(rhs);
7736 0 : return *this;
7737 : }
7738 :
7739 0 : mcpdstate::~mcpdstate()
7740 : {
7741 0 : }
7742 :
7743 :
7744 : /*************************************************************************
7745 : This structure is a MCPD training report:
7746 : InnerIterationsCount - number of inner iterations of the
7747 : underlying optimization algorithm
7748 : OuterIterationsCount - number of outer iterations of the
7749 : underlying optimization algorithm
7750 : NFEV - number of merit function evaluations
7751 : TerminationType - termination type
7752 : (same as for MinBLEIC optimizer, positive
7753 : values denote success, negative ones -
7754 : failure)
7755 :
7756 : -- ALGLIB --
7757 : Copyright 23.05.2010 by Bochkanov Sergey
7758 : *************************************************************************/
7759 0 : _mcpdreport_owner::_mcpdreport_owner()
7760 : {
7761 : jmp_buf _break_jump;
7762 : alglib_impl::ae_state _state;
7763 :
7764 0 : alglib_impl::ae_state_init(&_state);
7765 0 : if( setjmp(_break_jump) )
7766 : {
7767 0 : if( p_struct!=NULL )
7768 : {
7769 0 : alglib_impl::_mcpdreport_destroy(p_struct);
7770 0 : alglib_impl::ae_free(p_struct);
7771 : }
7772 0 : p_struct = NULL;
7773 : #if !defined(AE_NO_EXCEPTIONS)
7774 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7775 : #else
7776 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7777 : return;
7778 : #endif
7779 : }
7780 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7781 0 : p_struct = NULL;
7782 0 : p_struct = (alglib_impl::mcpdreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdreport), &_state);
7783 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
7784 0 : alglib_impl::_mcpdreport_init(p_struct, &_state, ae_false);
7785 0 : ae_state_clear(&_state);
7786 0 : }
7787 :
7788 0 : _mcpdreport_owner::_mcpdreport_owner(const _mcpdreport_owner &rhs)
7789 : {
7790 : jmp_buf _break_jump;
7791 : alglib_impl::ae_state _state;
7792 :
7793 0 : alglib_impl::ae_state_init(&_state);
7794 0 : if( setjmp(_break_jump) )
7795 : {
7796 0 : if( p_struct!=NULL )
7797 : {
7798 0 : alglib_impl::_mcpdreport_destroy(p_struct);
7799 0 : alglib_impl::ae_free(p_struct);
7800 : }
7801 0 : p_struct = NULL;
7802 : #if !defined(AE_NO_EXCEPTIONS)
7803 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7804 : #else
7805 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7806 : return;
7807 : #endif
7808 : }
7809 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7810 0 : p_struct = NULL;
7811 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdreport copy constructor failure (source is not initialized)", &_state);
7812 0 : p_struct = (alglib_impl::mcpdreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mcpdreport), &_state);
7813 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
7814 0 : alglib_impl::_mcpdreport_init_copy(p_struct, const_cast<alglib_impl::mcpdreport*>(rhs.p_struct), &_state, ae_false);
7815 0 : ae_state_clear(&_state);
7816 0 : }
7817 :
7818 0 : _mcpdreport_owner& _mcpdreport_owner::operator=(const _mcpdreport_owner &rhs)
7819 : {
7820 0 : if( this==&rhs )
7821 0 : return *this;
7822 : jmp_buf _break_jump;
7823 : alglib_impl::ae_state _state;
7824 :
7825 0 : alglib_impl::ae_state_init(&_state);
7826 0 : if( setjmp(_break_jump) )
7827 : {
7828 : #if !defined(AE_NO_EXCEPTIONS)
7829 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7830 : #else
7831 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7832 : return *this;
7833 : #endif
7834 : }
7835 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7836 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mcpdreport assignment constructor failure (destination is not initialized)", &_state);
7837 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mcpdreport assignment constructor failure (source is not initialized)", &_state);
7838 0 : alglib_impl::_mcpdreport_destroy(p_struct);
7839 0 : memset(p_struct, 0, sizeof(alglib_impl::mcpdreport));
7840 0 : alglib_impl::_mcpdreport_init_copy(p_struct, const_cast<alglib_impl::mcpdreport*>(rhs.p_struct), &_state, ae_false);
7841 0 : ae_state_clear(&_state);
7842 0 : return *this;
7843 : }
7844 :
7845 0 : _mcpdreport_owner::~_mcpdreport_owner()
7846 : {
7847 0 : if( p_struct!=NULL )
7848 : {
7849 0 : alglib_impl::_mcpdreport_destroy(p_struct);
7850 0 : ae_free(p_struct);
7851 : }
7852 0 : }
7853 :
7854 0 : alglib_impl::mcpdreport* _mcpdreport_owner::c_ptr()
7855 : {
7856 0 : return p_struct;
7857 : }
7858 :
7859 0 : alglib_impl::mcpdreport* _mcpdreport_owner::c_ptr() const
7860 : {
7861 0 : return const_cast<alglib_impl::mcpdreport*>(p_struct);
7862 : }
7863 0 : mcpdreport::mcpdreport() : _mcpdreport_owner() ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
7864 : {
7865 0 : }
7866 :
7867 0 : mcpdreport::mcpdreport(const mcpdreport &rhs):_mcpdreport_owner(rhs) ,inneriterationscount(p_struct->inneriterationscount),outeriterationscount(p_struct->outeriterationscount),nfev(p_struct->nfev),terminationtype(p_struct->terminationtype)
7868 : {
7869 0 : }
7870 :
7871 0 : mcpdreport& mcpdreport::operator=(const mcpdreport &rhs)
7872 : {
7873 0 : if( this==&rhs )
7874 0 : return *this;
7875 0 : _mcpdreport_owner::operator=(rhs);
7876 0 : return *this;
7877 : }
7878 :
7879 0 : mcpdreport::~mcpdreport()
7880 : {
7881 0 : }
7882 :
7883 : /*************************************************************************
7884 : DESCRIPTION:
7885 :
7886 : This function creates MCPD (Markov Chains for Population Data) solver.
7887 :
7888 : This solver can be used to find transition matrix P for N-dimensional
7889 : prediction problem where transition from X[i] to X[i+1] is modelled as
7890 : X[i+1] = P*X[i]
7891 : where X[i] and X[i+1] are N-dimensional population vectors (components of
7892 : each X are non-negative), and P is a N*N transition matrix (elements of P
7893 : are non-negative, each column sums to 1.0).
7894 :
7895 : Such models arise when when:
7896 : * there is some population of individuals
7897 : * individuals can have different states
7898 : * individuals can transit from one state to another
7899 : * population size is constant, i.e. there is no new individuals and no one
7900 : leaves population
7901 : * you want to model transitions of individuals from one state into another
7902 :
7903 : USAGE:
7904 :
7905 : Here we give very brief outline of the MCPD. We strongly recommend you to
7906 : read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
7907 : on data analysis which is available at http://www.alglib.net/dataanalysis/
7908 :
7909 : 1. User initializes algorithm state with MCPDCreate() call
7910 :
7911 : 2. User adds one or more tracks - sequences of states which describe
7912 : evolution of a system being modelled from different starting conditions
7913 :
7914 : 3. User may add optional boundary, equality and/or linear constraints on
7915 : the coefficients of P by calling one of the following functions:
7916 : * MCPDSetEC() to set equality constraints
7917 : * MCPDSetBC() to set bound constraints
7918 : * MCPDSetLC() to set linear constraints
7919 :
7920 : 4. Optionally, user may set custom weights for prediction errors (by
7921 : default, algorithm assigns non-equal, automatically chosen weights for
7922 : errors in the prediction of different components of X). It can be done
7923 : with a call of MCPDSetPredictionWeights() function.
7924 :
7925 : 5. User calls MCPDSolve() function which takes algorithm state and
7926 : pointer (delegate, etc.) to callback function which calculates F/G.
7927 :
7928 : 6. User calls MCPDResults() to get solution
7929 :
7930 : INPUT PARAMETERS:
7931 : N - problem dimension, N>=1
7932 :
7933 : OUTPUT PARAMETERS:
7934 : State - structure stores algorithm state
7935 :
7936 : -- ALGLIB --
7937 : Copyright 23.05.2010 by Bochkanov Sergey
7938 : *************************************************************************/
7939 0 : void mcpdcreate(const ae_int_t n, mcpdstate &s, const xparams _xparams)
7940 : {
7941 : jmp_buf _break_jump;
7942 : alglib_impl::ae_state _alglib_env_state;
7943 0 : alglib_impl::ae_state_init(&_alglib_env_state);
7944 0 : if( setjmp(_break_jump) )
7945 : {
7946 : #if !defined(AE_NO_EXCEPTIONS)
7947 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
7948 : #else
7949 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
7950 : return;
7951 : #endif
7952 : }
7953 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
7954 0 : if( _xparams.flags!=0x0 )
7955 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
7956 0 : alglib_impl::mcpdcreate(n, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
7957 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
7958 0 : return;
7959 : }
7960 :
7961 : /*************************************************************************
7962 : DESCRIPTION:
7963 :
7964 : This function is a specialized version of MCPDCreate() function, and we
7965 : recommend you to read comments for this function for general information
7966 : about MCPD solver.
7967 :
7968 : This function creates MCPD (Markov Chains for Population Data) solver
7969 : for "Entry-state" model, i.e. model where transition from X[i] to X[i+1]
7970 : is modelled as
7971 : X[i+1] = P*X[i]
7972 : where
7973 : X[i] and X[i+1] are N-dimensional state vectors
7974 : P is a N*N transition matrix
7975 : and one selected component of X[] is called "entry" state and is treated
7976 : in a special way:
7977 : system state always transits from "entry" state to some another state
7978 : system state can not transit from any state into "entry" state
7979 : Such conditions basically mean that row of P which corresponds to "entry"
7980 : state is zero.
7981 :
7982 : Such models arise when:
7983 : * there is some population of individuals
7984 : * individuals can have different states
7985 : * individuals can transit from one state to another
7986 : * population size is NOT constant - at every moment of time there is some
7987 : (unpredictable) amount of "new" individuals, which can transit into one
7988 : of the states at the next turn, but still no one leaves population
7989 : * you want to model transitions of individuals from one state into another
7990 : * but you do NOT want to predict amount of "new" individuals because it
7991 : does not depends on individuals already present (hence system can not
7992 : transit INTO entry state - it can only transit FROM it).
7993 :
7994 : This model is discussed in more details in the ALGLIB User Guide (see
7995 : http://www.alglib.net/dataanalysis/ for more data).
7996 :
7997 : INPUT PARAMETERS:
7998 : N - problem dimension, N>=2
7999 : EntryState- index of entry state, in 0..N-1
8000 :
8001 : OUTPUT PARAMETERS:
8002 : State - structure stores algorithm state
8003 :
8004 : -- ALGLIB --
8005 : Copyright 23.05.2010 by Bochkanov Sergey
8006 : *************************************************************************/
8007 0 : void mcpdcreateentry(const ae_int_t n, const ae_int_t entrystate, mcpdstate &s, const xparams _xparams)
8008 : {
8009 : jmp_buf _break_jump;
8010 : alglib_impl::ae_state _alglib_env_state;
8011 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8012 0 : if( setjmp(_break_jump) )
8013 : {
8014 : #if !defined(AE_NO_EXCEPTIONS)
8015 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8016 : #else
8017 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8018 : return;
8019 : #endif
8020 : }
8021 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8022 0 : if( _xparams.flags!=0x0 )
8023 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8024 0 : alglib_impl::mcpdcreateentry(n, entrystate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
8025 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8026 0 : return;
8027 : }
8028 :
8029 : /*************************************************************************
8030 : DESCRIPTION:
8031 :
8032 : This function is a specialized version of MCPDCreate() function, and we
8033 : recommend you to read comments for this function for general information
8034 : about MCPD solver.
8035 :
8036 : This function creates MCPD (Markov Chains for Population Data) solver
8037 : for "Exit-state" model, i.e. model where transition from X[i] to X[i+1]
8038 : is modelled as
8039 : X[i+1] = P*X[i]
8040 : where
8041 : X[i] and X[i+1] are N-dimensional state vectors
8042 : P is a N*N transition matrix
8043 : and one selected component of X[] is called "exit" state and is treated
8044 : in a special way:
8045 : system state can transit from any state into "exit" state
8046 : system state can not transit from "exit" state into any other state
8047 : transition operator discards "exit" state (makes it zero at each turn)
8048 : Such conditions basically mean that column of P which corresponds to
8049 : "exit" state is zero. Multiplication by such P may decrease sum of vector
8050 : components.
8051 :
8052 : Such models arise when:
8053 : * there is some population of individuals
8054 : * individuals can have different states
8055 : * individuals can transit from one state to another
8056 : * population size is NOT constant - individuals can move into "exit" state
8057 : and leave population at the next turn, but there are no new individuals
8058 : * amount of individuals which leave population can be predicted
8059 : * you want to model transitions of individuals from one state into another
8060 : (including transitions into the "exit" state)
8061 :
8062 : This model is discussed in more details in the ALGLIB User Guide (see
8063 : http://www.alglib.net/dataanalysis/ for more data).
8064 :
8065 : INPUT PARAMETERS:
8066 : N - problem dimension, N>=2
8067 : ExitState- index of exit state, in 0..N-1
8068 :
8069 : OUTPUT PARAMETERS:
8070 : State - structure stores algorithm state
8071 :
8072 : -- ALGLIB --
8073 : Copyright 23.05.2010 by Bochkanov Sergey
8074 : *************************************************************************/
8075 0 : void mcpdcreateexit(const ae_int_t n, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams)
8076 : {
8077 : jmp_buf _break_jump;
8078 : alglib_impl::ae_state _alglib_env_state;
8079 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8080 0 : if( setjmp(_break_jump) )
8081 : {
8082 : #if !defined(AE_NO_EXCEPTIONS)
8083 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8084 : #else
8085 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8086 : return;
8087 : #endif
8088 : }
8089 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8090 0 : if( _xparams.flags!=0x0 )
8091 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8092 0 : alglib_impl::mcpdcreateexit(n, exitstate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
8093 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8094 0 : return;
8095 : }
8096 :
8097 : /*************************************************************************
8098 : DESCRIPTION:
8099 :
8100 : This function is a specialized version of MCPDCreate() function, and we
8101 : recommend you to read comments for this function for general information
8102 : about MCPD solver.
8103 :
8104 : This function creates MCPD (Markov Chains for Population Data) solver
8105 : for "Entry-Exit-states" model, i.e. model where transition from X[i] to
8106 : X[i+1] is modelled as
8107 : X[i+1] = P*X[i]
8108 : where
8109 : X[i] and X[i+1] are N-dimensional state vectors
8110 : P is a N*N transition matrix
8111 : one selected component of X[] is called "entry" state and is treated in a
8112 : special way:
8113 : system state always transits from "entry" state to some another state
8114 : system state can not transit from any state into "entry" state
8115 : and another one component of X[] is called "exit" state and is treated in
8116 : a special way too:
8117 : system state can transit from any state into "exit" state
8118 : system state can not transit from "exit" state into any other state
8119 : transition operator discards "exit" state (makes it zero at each turn)
8120 : Such conditions basically mean that:
8121 : row of P which corresponds to "entry" state is zero
8122 : column of P which corresponds to "exit" state is zero
8123 : Multiplication by such P may decrease sum of vector components.
8124 :
8125 : Such models arise when:
8126 : * there is some population of individuals
8127 : * individuals can have different states
8128 : * individuals can transit from one state to another
8129 : * population size is NOT constant
8130 : * at every moment of time there is some (unpredictable) amount of "new"
8131 : individuals, which can transit into one of the states at the next turn
8132 : * some individuals can move (predictably) into "exit" state and leave
8133 : population at the next turn
8134 : * you want to model transitions of individuals from one state into another,
8135 : including transitions from the "entry" state and into the "exit" state.
8136 : * but you do NOT want to predict amount of "new" individuals because it
8137 : does not depends on individuals already present (hence system can not
8138 : transit INTO entry state - it can only transit FROM it).
8139 :
8140 : This model is discussed in more details in the ALGLIB User Guide (see
8141 : http://www.alglib.net/dataanalysis/ for more data).
8142 :
8143 : INPUT PARAMETERS:
8144 : N - problem dimension, N>=2
8145 : EntryState- index of entry state, in 0..N-1
8146 : ExitState- index of exit state, in 0..N-1
8147 :
8148 : OUTPUT PARAMETERS:
8149 : State - structure stores algorithm state
8150 :
8151 : -- ALGLIB --
8152 : Copyright 23.05.2010 by Bochkanov Sergey
8153 : *************************************************************************/
8154 0 : void mcpdcreateentryexit(const ae_int_t n, const ae_int_t entrystate, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams)
8155 : {
8156 : jmp_buf _break_jump;
8157 : alglib_impl::ae_state _alglib_env_state;
8158 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8159 0 : if( setjmp(_break_jump) )
8160 : {
8161 : #if !defined(AE_NO_EXCEPTIONS)
8162 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8163 : #else
8164 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8165 : return;
8166 : #endif
8167 : }
8168 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8169 0 : if( _xparams.flags!=0x0 )
8170 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8171 0 : alglib_impl::mcpdcreateentryexit(n, entrystate, exitstate, const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
8172 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8173 0 : return;
8174 : }
8175 :
8176 : /*************************************************************************
8177 : This function is used to add a track - sequence of system states at the
8178 : different moments of its evolution.
8179 :
8180 : You may add one or several tracks to the MCPD solver. In case you have
8181 : several tracks, they won't overwrite each other. For example, if you pass
8182 : two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
8183 : solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
8184 : t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
8185 : wont try to model transition from t=A+3 to t=B+1.
8186 :
8187 : INPUT PARAMETERS:
8188 : S - solver
8189 : XY - track, array[K,N]:
8190 : * I-th row is a state at t=I
8191 : * elements of XY must be non-negative (exception will be
8192 : thrown on negative elements)
8193 : K - number of points in a track
8194 : * if given, only leading K rows of XY are used
8195 : * if not given, automatically determined from size of XY
8196 :
8197 : NOTES:
8198 :
8199 : 1. Track may contain either proportional or population data:
8200 : * with proportional data all rows of XY must sum to 1.0, i.e. we have
8201 : proportions instead of absolute population values
8202 : * with population data rows of XY contain population counts and generally
8203 : do not sum to 1.0 (although they still must be non-negative)
8204 :
8205 : -- ALGLIB --
8206 : Copyright 23.05.2010 by Bochkanov Sergey
8207 : *************************************************************************/
8208 0 : void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const ae_int_t k, const xparams _xparams)
8209 : {
8210 : jmp_buf _break_jump;
8211 : alglib_impl::ae_state _alglib_env_state;
8212 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8213 0 : if( setjmp(_break_jump) )
8214 : {
8215 : #if !defined(AE_NO_EXCEPTIONS)
8216 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8217 : #else
8218 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8219 : return;
8220 : #endif
8221 : }
8222 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8223 0 : if( _xparams.flags!=0x0 )
8224 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8225 0 : alglib_impl::mcpdaddtrack(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), k, &_alglib_env_state);
8226 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8227 0 : return;
8228 : }
8229 :
8230 : /*************************************************************************
8231 : This function is used to add a track - sequence of system states at the
8232 : different moments of its evolution.
8233 :
8234 : You may add one or several tracks to the MCPD solver. In case you have
8235 : several tracks, they won't overwrite each other. For example, if you pass
8236 : two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
8237 : solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
8238 : t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
8239 : wont try to model transition from t=A+3 to t=B+1.
8240 :
8241 : INPUT PARAMETERS:
8242 : S - solver
8243 : XY - track, array[K,N]:
8244 : * I-th row is a state at t=I
8245 : * elements of XY must be non-negative (exception will be
8246 : thrown on negative elements)
8247 : K - number of points in a track
8248 : * if given, only leading K rows of XY are used
8249 : * if not given, automatically determined from size of XY
8250 :
8251 : NOTES:
8252 :
8253 : 1. Track may contain either proportional or population data:
8254 : * with proportional data all rows of XY must sum to 1.0, i.e. we have
8255 : proportions instead of absolute population values
8256 : * with population data rows of XY contain population counts and generally
8257 : do not sum to 1.0 (although they still must be non-negative)
8258 :
8259 : -- ALGLIB --
8260 : Copyright 23.05.2010 by Bochkanov Sergey
8261 : *************************************************************************/
8262 : #if !defined(AE_NO_EXCEPTIONS)
8263 0 : void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const xparams _xparams)
8264 : {
8265 : jmp_buf _break_jump;
8266 : alglib_impl::ae_state _alglib_env_state;
8267 : ae_int_t k;
8268 :
8269 0 : k = xy.rows();
8270 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8271 0 : if( setjmp(_break_jump) )
8272 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8273 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8274 0 : if( _xparams.flags!=0x0 )
8275 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8276 0 : alglib_impl::mcpdaddtrack(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), k, &_alglib_env_state);
8277 :
8278 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8279 0 : return;
8280 : }
8281 : #endif
8282 :
8283 : /*************************************************************************
8284 : This function is used to add equality constraints on the elements of the
8285 : transition matrix P.
8286 :
8287 : MCPD solver has four types of constraints which can be placed on P:
8288 : * user-specified equality constraints (optional)
8289 : * user-specified bound constraints (optional)
8290 : * user-specified general linear constraints (optional)
8291 : * basic constraints (always present):
8292 : * non-negativity: P[i,j]>=0
8293 : * consistency: every column of P sums to 1.0
8294 :
8295 : Final constraints which are passed to the underlying optimizer are
8296 : calculated as intersection of all present constraints. For example, you
8297 : may specify boundary constraint on P[0,0] and equality one:
8298 : 0.1<=P[0,0]<=0.9
8299 : P[0,0]=0.5
8300 : Such combination of constraints will be silently reduced to their
8301 : intersection, which is P[0,0]=0.5.
8302 :
8303 : This function can be used to place equality constraints on arbitrary
8304 : subset of elements of P. Set of constraints is specified by EC, which may
8305 : contain either NAN's or finite numbers from [0,1]. NAN denotes absence of
8306 : constraint, finite number denotes equality constraint on specific element
8307 : of P.
8308 :
8309 : You can also use MCPDAddEC() function which allows to ADD equality
8310 : constraint for one element of P without changing constraints for other
8311 : elements.
8312 :
8313 : These functions (MCPDSetEC and MCPDAddEC) interact as follows:
8314 : * there is internal matrix of equality constraints which is stored in the
8315 : MCPD solver
8316 : * MCPDSetEC() replaces this matrix by another one (SET)
8317 : * MCPDAddEC() modifies one element of this matrix and leaves other ones
8318 : unchanged (ADD)
8319 : * thus MCPDAddEC() call preserves all modifications done by previous
8320 : calls, while MCPDSetEC() completely discards all changes done to the
8321 : equality constraints.
8322 :
8323 : INPUT PARAMETERS:
8324 : S - solver
8325 : EC - equality constraints, array[N,N]. Elements of EC can be
8326 : either NAN's or finite numbers from [0,1]. NAN denotes
8327 : absence of constraints, while finite value denotes
8328 : equality constraint on the corresponding element of P.
8329 :
8330 : NOTES:
8331 :
8332 : 1. infinite values of EC will lead to exception being thrown. Values less
8333 : than 0.0 or greater than 1.0 will lead to error code being returned after
8334 : call to MCPDSolve().
8335 :
8336 : -- ALGLIB --
8337 : Copyright 23.05.2010 by Bochkanov Sergey
8338 : *************************************************************************/
8339 0 : void mcpdsetec(const mcpdstate &s, const real_2d_array &ec, const xparams _xparams)
8340 : {
8341 : jmp_buf _break_jump;
8342 : alglib_impl::ae_state _alglib_env_state;
8343 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8344 0 : if( setjmp(_break_jump) )
8345 : {
8346 : #if !defined(AE_NO_EXCEPTIONS)
8347 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8348 : #else
8349 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8350 : return;
8351 : #endif
8352 : }
8353 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8354 0 : if( _xparams.flags!=0x0 )
8355 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8356 0 : alglib_impl::mcpdsetec(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(ec.c_ptr()), &_alglib_env_state);
8357 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8358 0 : return;
8359 : }
8360 :
8361 : /*************************************************************************
8362 : This function is used to add equality constraints on the elements of the
8363 : transition matrix P.
8364 :
8365 : MCPD solver has four types of constraints which can be placed on P:
8366 : * user-specified equality constraints (optional)
8367 : * user-specified bound constraints (optional)
8368 : * user-specified general linear constraints (optional)
8369 : * basic constraints (always present):
8370 : * non-negativity: P[i,j]>=0
8371 : * consistency: every column of P sums to 1.0
8372 :
8373 : Final constraints which are passed to the underlying optimizer are
8374 : calculated as intersection of all present constraints. For example, you
8375 : may specify boundary constraint on P[0,0] and equality one:
8376 : 0.1<=P[0,0]<=0.9
8377 : P[0,0]=0.5
8378 : Such combination of constraints will be silently reduced to their
8379 : intersection, which is P[0,0]=0.5.
8380 :
8381 : This function can be used to ADD equality constraint for one element of P
8382 : without changing constraints for other elements.
8383 :
8384 : You can also use MCPDSetEC() function which allows you to specify
8385 : arbitrary set of equality constraints in one call.
8386 :
8387 : These functions (MCPDSetEC and MCPDAddEC) interact as follows:
8388 : * there is internal matrix of equality constraints which is stored in the
8389 : MCPD solver
8390 : * MCPDSetEC() replaces this matrix by another one (SET)
8391 : * MCPDAddEC() modifies one element of this matrix and leaves other ones
8392 : unchanged (ADD)
8393 : * thus MCPDAddEC() call preserves all modifications done by previous
8394 : calls, while MCPDSetEC() completely discards all changes done to the
8395 : equality constraints.
8396 :
8397 : INPUT PARAMETERS:
8398 : S - solver
8399 : I - row index of element being constrained
8400 : J - column index of element being constrained
8401 : C - value (constraint for P[I,J]). Can be either NAN (no
8402 : constraint) or finite value from [0,1].
8403 :
8404 : NOTES:
8405 :
8406 : 1. infinite values of C will lead to exception being thrown. Values less
8407 : than 0.0 or greater than 1.0 will lead to error code being returned after
8408 : call to MCPDSolve().
8409 :
8410 : -- ALGLIB --
8411 : Copyright 23.05.2010 by Bochkanov Sergey
8412 : *************************************************************************/
8413 0 : void mcpdaddec(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double c, const xparams _xparams)
8414 : {
8415 : jmp_buf _break_jump;
8416 : alglib_impl::ae_state _alglib_env_state;
8417 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8418 0 : if( setjmp(_break_jump) )
8419 : {
8420 : #if !defined(AE_NO_EXCEPTIONS)
8421 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8422 : #else
8423 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8424 : return;
8425 : #endif
8426 : }
8427 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8428 0 : if( _xparams.flags!=0x0 )
8429 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8430 0 : alglib_impl::mcpdaddec(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), i, j, c, &_alglib_env_state);
8431 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8432 0 : return;
8433 : }
8434 :
8435 : /*************************************************************************
8436 : This function is used to add bound constraints on the elements of the
8437 : transition matrix P.
8438 :
8439 : MCPD solver has four types of constraints which can be placed on P:
8440 : * user-specified equality constraints (optional)
8441 : * user-specified bound constraints (optional)
8442 : * user-specified general linear constraints (optional)
8443 : * basic constraints (always present):
8444 : * non-negativity: P[i,j]>=0
8445 : * consistency: every column of P sums to 1.0
8446 :
8447 : Final constraints which are passed to the underlying optimizer are
8448 : calculated as intersection of all present constraints. For example, you
8449 : may specify boundary constraint on P[0,0] and equality one:
8450 : 0.1<=P[0,0]<=0.9
8451 : P[0,0]=0.5
8452 : Such combination of constraints will be silently reduced to their
8453 : intersection, which is P[0,0]=0.5.
8454 :
8455 : This function can be used to place bound constraints on arbitrary
8456 : subset of elements of P. Set of constraints is specified by BndL/BndU
8457 : matrices, which may contain arbitrary combination of finite numbers or
8458 : infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
8459 :
8460 : You can also use MCPDAddBC() function which allows to ADD bound constraint
8461 : for one element of P without changing constraints for other elements.
8462 :
8463 : These functions (MCPDSetBC and MCPDAddBC) interact as follows:
8464 : * there is internal matrix of bound constraints which is stored in the
8465 : MCPD solver
8466 : * MCPDSetBC() replaces this matrix by another one (SET)
8467 : * MCPDAddBC() modifies one element of this matrix and leaves other ones
8468 : unchanged (ADD)
8469 : * thus MCPDAddBC() call preserves all modifications done by previous
8470 : calls, while MCPDSetBC() completely discards all changes done to the
8471 : equality constraints.
8472 :
8473 : INPUT PARAMETERS:
8474 : S - solver
8475 : BndL - lower bounds constraints, array[N,N]. Elements of BndL can
8476 : be finite numbers or -INF.
8477 : BndU - upper bounds constraints, array[N,N]. Elements of BndU can
8478 : be finite numbers or +INF.
8479 :
8480 : -- ALGLIB --
8481 : Copyright 23.05.2010 by Bochkanov Sergey
8482 : *************************************************************************/
8483 0 : void mcpdsetbc(const mcpdstate &s, const real_2d_array &bndl, const real_2d_array &bndu, const xparams _xparams)
8484 : {
8485 : jmp_buf _break_jump;
8486 : alglib_impl::ae_state _alglib_env_state;
8487 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8488 0 : if( setjmp(_break_jump) )
8489 : {
8490 : #if !defined(AE_NO_EXCEPTIONS)
8491 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8492 : #else
8493 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8494 : return;
8495 : #endif
8496 : }
8497 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8498 0 : if( _xparams.flags!=0x0 )
8499 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8500 0 : alglib_impl::mcpdsetbc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(bndl.c_ptr()), const_cast<alglib_impl::ae_matrix*>(bndu.c_ptr()), &_alglib_env_state);
8501 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8502 0 : return;
8503 : }
8504 :
8505 : /*************************************************************************
8506 : This function is used to add bound constraints on the elements of the
8507 : transition matrix P.
8508 :
8509 : MCPD solver has four types of constraints which can be placed on P:
8510 : * user-specified equality constraints (optional)
8511 : * user-specified bound constraints (optional)
8512 : * user-specified general linear constraints (optional)
8513 : * basic constraints (always present):
8514 : * non-negativity: P[i,j]>=0
8515 : * consistency: every column of P sums to 1.0
8516 :
8517 : Final constraints which are passed to the underlying optimizer are
8518 : calculated as intersection of all present constraints. For example, you
8519 : may specify boundary constraint on P[0,0] and equality one:
8520 : 0.1<=P[0,0]<=0.9
8521 : P[0,0]=0.5
8522 : Such combination of constraints will be silently reduced to their
8523 : intersection, which is P[0,0]=0.5.
8524 :
8525 : This function can be used to ADD bound constraint for one element of P
8526 : without changing constraints for other elements.
8527 :
8528 : You can also use MCPDSetBC() function which allows to place bound
8529 : constraints on arbitrary subset of elements of P. Set of constraints is
8530 : specified by BndL/BndU matrices, which may contain arbitrary combination
8531 : of finite numbers or infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
8532 :
8533 : These functions (MCPDSetBC and MCPDAddBC) interact as follows:
8534 : * there is internal matrix of bound constraints which is stored in the
8535 : MCPD solver
8536 : * MCPDSetBC() replaces this matrix by another one (SET)
8537 : * MCPDAddBC() modifies one element of this matrix and leaves other ones
8538 : unchanged (ADD)
8539 : * thus MCPDAddBC() call preserves all modifications done by previous
8540 : calls, while MCPDSetBC() completely discards all changes done to the
8541 : equality constraints.
8542 :
8543 : INPUT PARAMETERS:
8544 : S - solver
8545 : I - row index of element being constrained
8546 : J - column index of element being constrained
8547 : BndL - lower bound
8548 : BndU - upper bound
8549 :
8550 : -- ALGLIB --
8551 : Copyright 23.05.2010 by Bochkanov Sergey
8552 : *************************************************************************/
8553 0 : void mcpdaddbc(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double bndl, const double bndu, const xparams _xparams)
8554 : {
8555 : jmp_buf _break_jump;
8556 : alglib_impl::ae_state _alglib_env_state;
8557 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8558 0 : if( setjmp(_break_jump) )
8559 : {
8560 : #if !defined(AE_NO_EXCEPTIONS)
8561 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8562 : #else
8563 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8564 : return;
8565 : #endif
8566 : }
8567 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8568 0 : if( _xparams.flags!=0x0 )
8569 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8570 0 : alglib_impl::mcpdaddbc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), i, j, bndl, bndu, &_alglib_env_state);
8571 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8572 0 : return;
8573 : }
8574 :
8575 : /*************************************************************************
8576 : This function is used to set linear equality/inequality constraints on the
8577 : elements of the transition matrix P.
8578 :
8579 : This function can be used to set one or several general linear constraints
8580 : on the elements of P. Two types of constraints are supported:
8581 : * equality constraints
8582 : * inequality constraints (both less-or-equal and greater-or-equal)
8583 :
8584 : Coefficients of constraints are specified by matrix C (one of the
8585 : parameters). One row of C corresponds to one constraint. Because
8586 : transition matrix P has N*N elements, we need N*N columns to store all
8587 : coefficients (they are stored row by row), and one more column to store
8588 : right part - hence C has N*N+1 columns. Constraint kind is stored in the
8589 : CT array.
8590 :
8591 : Thus, I-th linear constraint is
8592 : P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
8593 : + P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
8594 : + P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
8595 : where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
8596 :
8597 : Your constraint may involve only some subset of P (less than N*N elements).
8598 : For example it can be something like
8599 : P[0,0] + P[0,1] = 0.5
8600 : In this case you still should pass matrix with N*N+1 columns, but all its
8601 : elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
8602 :
8603 : INPUT PARAMETERS:
8604 : S - solver
8605 : C - array[K,N*N+1] - coefficients of constraints
8606 : (see above for complete description)
8607 : CT - array[K] - constraint types
8608 : (see above for complete description)
8609 : K - number of equality/inequality constraints, K>=0:
8610 : * if given, only leading K elements of C/CT are used
8611 : * if not given, automatically determined from sizes of C/CT
8612 :
8613 : -- ALGLIB --
8614 : Copyright 23.05.2010 by Bochkanov Sergey
8615 : *************************************************************************/
8616 0 : void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k, const xparams _xparams)
8617 : {
8618 : jmp_buf _break_jump;
8619 : alglib_impl::ae_state _alglib_env_state;
8620 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8621 0 : if( setjmp(_break_jump) )
8622 : {
8623 : #if !defined(AE_NO_EXCEPTIONS)
8624 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8625 : #else
8626 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8627 : return;
8628 : #endif
8629 : }
8630 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8631 0 : if( _xparams.flags!=0x0 )
8632 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8633 0 : alglib_impl::mcpdsetlc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
8634 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8635 0 : return;
8636 : }
8637 :
8638 : /*************************************************************************
8639 : This function is used to set linear equality/inequality constraints on the
8640 : elements of the transition matrix P.
8641 :
8642 : This function can be used to set one or several general linear constraints
8643 : on the elements of P. Two types of constraints are supported:
8644 : * equality constraints
8645 : * inequality constraints (both less-or-equal and greater-or-equal)
8646 :
8647 : Coefficients of constraints are specified by matrix C (one of the
8648 : parameters). One row of C corresponds to one constraint. Because
8649 : transition matrix P has N*N elements, we need N*N columns to store all
8650 : coefficients (they are stored row by row), and one more column to store
8651 : right part - hence C has N*N+1 columns. Constraint kind is stored in the
8652 : CT array.
8653 :
8654 : Thus, I-th linear constraint is
8655 : P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
8656 : + P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
8657 : + P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
8658 : where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
8659 :
8660 : Your constraint may involve only some subset of P (less than N*N elements).
8661 : For example it can be something like
8662 : P[0,0] + P[0,1] = 0.5
8663 : In this case you still should pass matrix with N*N+1 columns, but all its
8664 : elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
8665 :
8666 : INPUT PARAMETERS:
8667 : S - solver
8668 : C - array[K,N*N+1] - coefficients of constraints
8669 : (see above for complete description)
8670 : CT - array[K] - constraint types
8671 : (see above for complete description)
8672 : K - number of equality/inequality constraints, K>=0:
8673 : * if given, only leading K elements of C/CT are used
8674 : * if not given, automatically determined from sizes of C/CT
8675 :
8676 : -- ALGLIB --
8677 : Copyright 23.05.2010 by Bochkanov Sergey
8678 : *************************************************************************/
8679 : #if !defined(AE_NO_EXCEPTIONS)
8680 0 : void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const xparams _xparams)
8681 : {
8682 : jmp_buf _break_jump;
8683 : alglib_impl::ae_state _alglib_env_state;
8684 : ae_int_t k;
8685 0 : if( (c.rows()!=ct.length()))
8686 0 : _ALGLIB_CPP_EXCEPTION("Error while calling 'mcpdsetlc': looks like one of arguments has wrong size");
8687 0 : k = c.rows();
8688 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8689 0 : if( setjmp(_break_jump) )
8690 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8691 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8692 0 : if( _xparams.flags!=0x0 )
8693 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8694 0 : alglib_impl::mcpdsetlc(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(ct.c_ptr()), k, &_alglib_env_state);
8695 :
8696 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8697 0 : return;
8698 : }
8699 : #endif
8700 :
8701 : /*************************************************************************
8702 : This function allows to tune amount of Tikhonov regularization being
8703 : applied to your problem.
8704 :
8705 : By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
8706 : small non-zero value, P is transition matrix, prior_P is identity matrix,
8707 : ||X||^2 is a sum of squared elements of X.
8708 :
8709 : This function allows you to change coefficient r. You can also change
8710 : prior values with MCPDSetPrior() function.
8711 :
8712 : INPUT PARAMETERS:
8713 : S - solver
8714 : V - regularization coefficient, finite non-negative value. It
8715 : is not recommended to specify zero value unless you are
8716 : pretty sure that you want it.
8717 :
8718 : -- ALGLIB --
8719 : Copyright 23.05.2010 by Bochkanov Sergey
8720 : *************************************************************************/
8721 0 : void mcpdsettikhonovregularizer(const mcpdstate &s, const double v, const xparams _xparams)
8722 : {
8723 : jmp_buf _break_jump;
8724 : alglib_impl::ae_state _alglib_env_state;
8725 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8726 0 : if( setjmp(_break_jump) )
8727 : {
8728 : #if !defined(AE_NO_EXCEPTIONS)
8729 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8730 : #else
8731 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8732 : return;
8733 : #endif
8734 : }
8735 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8736 0 : if( _xparams.flags!=0x0 )
8737 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8738 0 : alglib_impl::mcpdsettikhonovregularizer(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), v, &_alglib_env_state);
8739 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8740 0 : return;
8741 : }
8742 :
8743 : /*************************************************************************
8744 : This function allows to set prior values used for regularization of your
8745 : problem.
8746 :
8747 : By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
8748 : small non-zero value, P is transition matrix, prior_P is identity matrix,
8749 : ||X||^2 is a sum of squared elements of X.
8750 :
8751 : This function allows you to change prior values prior_P. You can also
8752 : change r with MCPDSetTikhonovRegularizer() function.
8753 :
8754 : INPUT PARAMETERS:
8755 : S - solver
8756 : PP - array[N,N], matrix of prior values:
8757 : 1. elements must be real numbers from [0,1]
8758 : 2. columns must sum to 1.0.
8759 : First property is checked (exception is thrown otherwise),
8760 : while second one is not checked/enforced.
8761 :
8762 : -- ALGLIB --
8763 : Copyright 23.05.2010 by Bochkanov Sergey
8764 : *************************************************************************/
8765 0 : void mcpdsetprior(const mcpdstate &s, const real_2d_array &pp, const xparams _xparams)
8766 : {
8767 : jmp_buf _break_jump;
8768 : alglib_impl::ae_state _alglib_env_state;
8769 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8770 0 : if( setjmp(_break_jump) )
8771 : {
8772 : #if !defined(AE_NO_EXCEPTIONS)
8773 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8774 : #else
8775 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8776 : return;
8777 : #endif
8778 : }
8779 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8780 0 : if( _xparams.flags!=0x0 )
8781 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8782 0 : alglib_impl::mcpdsetprior(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(pp.c_ptr()), &_alglib_env_state);
8783 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8784 0 : return;
8785 : }
8786 :
8787 : /*************************************************************************
8788 : This function is used to change prediction weights
8789 :
8790 : MCPD solver scales prediction errors as follows
8791 : Error(P) = ||W*(y-P*x)||^2
8792 : where
8793 : x is a system state at time t
8794 : y is a system state at time t+1
8795 : P is a transition matrix
8796 : W is a diagonal scaling matrix
8797 :
8798 : By default, weights are chosen in order to minimize relative prediction
8799 : error instead of absolute one. For example, if one component of state is
8800 : about 0.5 in magnitude and another one is about 0.05, then algorithm will
8801 : make corresponding weights equal to 2.0 and 20.0.
8802 :
8803 : INPUT PARAMETERS:
8804 : S - solver
8805 : PW - array[N], weights:
8806 : * must be non-negative values (exception will be thrown otherwise)
8807 : * zero values will be replaced by automatically chosen values
8808 :
8809 : -- ALGLIB --
8810 : Copyright 23.05.2010 by Bochkanov Sergey
8811 : *************************************************************************/
8812 0 : void mcpdsetpredictionweights(const mcpdstate &s, const real_1d_array &pw, const xparams _xparams)
8813 : {
8814 : jmp_buf _break_jump;
8815 : alglib_impl::ae_state _alglib_env_state;
8816 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8817 0 : if( setjmp(_break_jump) )
8818 : {
8819 : #if !defined(AE_NO_EXCEPTIONS)
8820 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8821 : #else
8822 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8823 : return;
8824 : #endif
8825 : }
8826 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8827 0 : if( _xparams.flags!=0x0 )
8828 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8829 0 : alglib_impl::mcpdsetpredictionweights(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_vector*>(pw.c_ptr()), &_alglib_env_state);
8830 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8831 0 : return;
8832 : }
8833 :
8834 : /*************************************************************************
8835 : This function is used to start solution of the MCPD problem.
8836 :
8837 : After return from this function, you can use MCPDResults() to get solution
8838 : and completion code.
8839 :
8840 : -- ALGLIB --
8841 : Copyright 23.05.2010 by Bochkanov Sergey
8842 : *************************************************************************/
8843 0 : void mcpdsolve(const mcpdstate &s, const xparams _xparams)
8844 : {
8845 : jmp_buf _break_jump;
8846 : alglib_impl::ae_state _alglib_env_state;
8847 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8848 0 : if( setjmp(_break_jump) )
8849 : {
8850 : #if !defined(AE_NO_EXCEPTIONS)
8851 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8852 : #else
8853 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8854 : return;
8855 : #endif
8856 : }
8857 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8858 0 : if( _xparams.flags!=0x0 )
8859 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8860 0 : alglib_impl::mcpdsolve(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), &_alglib_env_state);
8861 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8862 0 : return;
8863 : }
8864 :
8865 : /*************************************************************************
8866 : MCPD results
8867 :
8868 : INPUT PARAMETERS:
8869 : State - algorithm state
8870 :
8871 : OUTPUT PARAMETERS:
8872 : P - array[N,N], transition matrix
8873 : Rep - optimization report. You should check Rep.TerminationType
8874 : in order to distinguish successful termination from
8875 : unsuccessful one. Speaking short, positive values denote
8876 : success, negative ones are failures.
8877 : More information about fields of this structure can be
8878 : found in the comments on MCPDReport datatype.
8879 :
8880 :
8881 : -- ALGLIB --
8882 : Copyright 23.05.2010 by Bochkanov Sergey
8883 : *************************************************************************/
8884 0 : void mcpdresults(const mcpdstate &s, real_2d_array &p, mcpdreport &rep, const xparams _xparams)
8885 : {
8886 : jmp_buf _break_jump;
8887 : alglib_impl::ae_state _alglib_env_state;
8888 0 : alglib_impl::ae_state_init(&_alglib_env_state);
8889 0 : if( setjmp(_break_jump) )
8890 : {
8891 : #if !defined(AE_NO_EXCEPTIONS)
8892 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
8893 : #else
8894 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
8895 : return;
8896 : #endif
8897 : }
8898 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
8899 0 : if( _xparams.flags!=0x0 )
8900 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
8901 0 : alglib_impl::mcpdresults(const_cast<alglib_impl::mcpdstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(p.c_ptr()), const_cast<alglib_impl::mcpdreport*>(rep.c_ptr()), &_alglib_env_state);
8902 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
8903 0 : return;
8904 : }
8905 : #endif
8906 :
8907 : #if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
8908 : /*************************************************************************
8909 : Neural networks ensemble
8910 : *************************************************************************/
8911 0 : _mlpensemble_owner::_mlpensemble_owner()
8912 : {
8913 : jmp_buf _break_jump;
8914 : alglib_impl::ae_state _state;
8915 :
8916 0 : alglib_impl::ae_state_init(&_state);
8917 0 : if( setjmp(_break_jump) )
8918 : {
8919 0 : if( p_struct!=NULL )
8920 : {
8921 0 : alglib_impl::_mlpensemble_destroy(p_struct);
8922 0 : alglib_impl::ae_free(p_struct);
8923 : }
8924 0 : p_struct = NULL;
8925 : #if !defined(AE_NO_EXCEPTIONS)
8926 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8927 : #else
8928 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8929 : return;
8930 : #endif
8931 : }
8932 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8933 0 : p_struct = NULL;
8934 0 : p_struct = (alglib_impl::mlpensemble*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpensemble), &_state);
8935 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
8936 0 : alglib_impl::_mlpensemble_init(p_struct, &_state, ae_false);
8937 0 : ae_state_clear(&_state);
8938 0 : }
8939 :
8940 0 : _mlpensemble_owner::_mlpensemble_owner(const _mlpensemble_owner &rhs)
8941 : {
8942 : jmp_buf _break_jump;
8943 : alglib_impl::ae_state _state;
8944 :
8945 0 : alglib_impl::ae_state_init(&_state);
8946 0 : if( setjmp(_break_jump) )
8947 : {
8948 0 : if( p_struct!=NULL )
8949 : {
8950 0 : alglib_impl::_mlpensemble_destroy(p_struct);
8951 0 : alglib_impl::ae_free(p_struct);
8952 : }
8953 0 : p_struct = NULL;
8954 : #if !defined(AE_NO_EXCEPTIONS)
8955 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8956 : #else
8957 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8958 : return;
8959 : #endif
8960 : }
8961 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8962 0 : p_struct = NULL;
8963 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpensemble copy constructor failure (source is not initialized)", &_state);
8964 0 : p_struct = (alglib_impl::mlpensemble*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpensemble), &_state);
8965 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
8966 0 : alglib_impl::_mlpensemble_init_copy(p_struct, const_cast<alglib_impl::mlpensemble*>(rhs.p_struct), &_state, ae_false);
8967 0 : ae_state_clear(&_state);
8968 0 : }
8969 :
8970 0 : _mlpensemble_owner& _mlpensemble_owner::operator=(const _mlpensemble_owner &rhs)
8971 : {
8972 0 : if( this==&rhs )
8973 0 : return *this;
8974 : jmp_buf _break_jump;
8975 : alglib_impl::ae_state _state;
8976 :
8977 0 : alglib_impl::ae_state_init(&_state);
8978 0 : if( setjmp(_break_jump) )
8979 : {
8980 : #if !defined(AE_NO_EXCEPTIONS)
8981 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8982 : #else
8983 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8984 : return *this;
8985 : #endif
8986 : }
8987 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8988 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpensemble assignment constructor failure (destination is not initialized)", &_state);
8989 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpensemble assignment constructor failure (source is not initialized)", &_state);
8990 0 : alglib_impl::_mlpensemble_destroy(p_struct);
8991 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpensemble));
8992 0 : alglib_impl::_mlpensemble_init_copy(p_struct, const_cast<alglib_impl::mlpensemble*>(rhs.p_struct), &_state, ae_false);
8993 0 : ae_state_clear(&_state);
8994 0 : return *this;
8995 : }
8996 :
8997 0 : _mlpensemble_owner::~_mlpensemble_owner()
8998 : {
8999 0 : if( p_struct!=NULL )
9000 : {
9001 0 : alglib_impl::_mlpensemble_destroy(p_struct);
9002 0 : ae_free(p_struct);
9003 : }
9004 0 : }
9005 :
9006 0 : alglib_impl::mlpensemble* _mlpensemble_owner::c_ptr()
9007 : {
9008 0 : return p_struct;
9009 : }
9010 :
9011 0 : alglib_impl::mlpensemble* _mlpensemble_owner::c_ptr() const
9012 : {
9013 0 : return const_cast<alglib_impl::mlpensemble*>(p_struct);
9014 : }
9015 0 : mlpensemble::mlpensemble() : _mlpensemble_owner()
9016 : {
9017 0 : }
9018 :
9019 0 : mlpensemble::mlpensemble(const mlpensemble &rhs):_mlpensemble_owner(rhs)
9020 : {
9021 0 : }
9022 :
9023 0 : mlpensemble& mlpensemble::operator=(const mlpensemble &rhs)
9024 : {
9025 0 : if( this==&rhs )
9026 0 : return *this;
9027 0 : _mlpensemble_owner::operator=(rhs);
9028 0 : return *this;
9029 : }
9030 :
9031 0 : mlpensemble::~mlpensemble()
9032 : {
9033 0 : }
9034 :
9035 :
9036 : /*************************************************************************
9037 : This function serializes data structure to string.
9038 :
9039 : Important properties of s_out:
9040 : * it contains alphanumeric characters, dots, underscores, minus signs
9041 : * these symbols are grouped into words, which are separated by spaces
9042 : and Windows-style (CR+LF) newlines
9043 : * although serializer uses spaces and CR+LF as separators, you can
9044 : replace any separator character by arbitrary combination of spaces,
9045 : tabs, Windows or Unix newlines. It allows flexible reformatting of
9046 : the string in case you want to include it into text or XML file.
9047 : But you should not insert separators into the middle of the "words"
9048 : nor you should change case of letters.
9049 : * s_out can be freely moved between 32-bit and 64-bit systems, little
9050 : and big endian machines, and so on. You can serialize structure on
9051 : 32-bit machine and unserialize it on 64-bit one (or vice versa), or
9052 : serialize it on SPARC and unserialize on x86. You can also
9053 : serialize it in C++ version of ALGLIB and unserialize in C# one,
9054 : and vice versa.
9055 : *************************************************************************/
9056 0 : void mlpeserialize(mlpensemble &obj, std::string &s_out)
9057 : {
9058 : jmp_buf _break_jump;
9059 : alglib_impl::ae_state state;
9060 : alglib_impl::ae_serializer serializer;
9061 : alglib_impl::ae_int_t ssize;
9062 :
9063 0 : alglib_impl::ae_state_init(&state);
9064 0 : if( setjmp(_break_jump) )
9065 : {
9066 : #if !defined(AE_NO_EXCEPTIONS)
9067 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
9068 : #else
9069 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
9070 : return;
9071 : #endif
9072 : }
9073 0 : ae_state_set_break_jump(&state, &_break_jump);
9074 0 : alglib_impl::ae_serializer_init(&serializer);
9075 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
9076 0 : alglib_impl::mlpealloc(&serializer, obj.c_ptr(), &state);
9077 0 : ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
9078 0 : s_out.clear();
9079 0 : s_out.reserve((size_t)(ssize+1));
9080 0 : alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
9081 0 : alglib_impl::mlpeserialize(&serializer, obj.c_ptr(), &state);
9082 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
9083 0 : alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
9084 0 : alglib_impl::ae_serializer_clear(&serializer);
9085 0 : alglib_impl::ae_state_clear(&state);
9086 0 : }
9087 : /*************************************************************************
9088 : This function unserializes data structure from string.
9089 : *************************************************************************/
9090 0 : void mlpeunserialize(const std::string &s_in, mlpensemble &obj)
9091 : {
9092 : jmp_buf _break_jump;
9093 : alglib_impl::ae_state state;
9094 : alglib_impl::ae_serializer serializer;
9095 :
9096 0 : alglib_impl::ae_state_init(&state);
9097 0 : if( setjmp(_break_jump) )
9098 : {
9099 : #if !defined(AE_NO_EXCEPTIONS)
9100 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
9101 : #else
9102 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
9103 : return;
9104 : #endif
9105 : }
9106 0 : ae_state_set_break_jump(&state, &_break_jump);
9107 0 : alglib_impl::ae_serializer_init(&serializer);
9108 0 : alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
9109 0 : alglib_impl::mlpeunserialize(&serializer, obj.c_ptr(), &state);
9110 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
9111 0 : alglib_impl::ae_serializer_clear(&serializer);
9112 0 : alglib_impl::ae_state_clear(&state);
9113 0 : }
9114 :
9115 :
9116 : /*************************************************************************
9117 : This function serializes data structure to C++ stream.
9118 :
9119 : Data stream generated by this function is same as string representation
9120 : generated by string version of serializer - alphanumeric characters,
9121 : dots, underscores, minus signs, which are grouped into words separated by
9122 : spaces and CR+LF.
9123 :
9124 : We recommend you to read comments on string version of serializer to find
9125 : out more about serialization of AlGLIB objects.
9126 : *************************************************************************/
9127 0 : void mlpeserialize(mlpensemble &obj, std::ostream &s_out)
9128 : {
9129 : jmp_buf _break_jump;
9130 : alglib_impl::ae_state state;
9131 : alglib_impl::ae_serializer serializer;
9132 :
9133 0 : alglib_impl::ae_state_init(&state);
9134 0 : if( setjmp(_break_jump) )
9135 : {
9136 : #if !defined(AE_NO_EXCEPTIONS)
9137 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
9138 : #else
9139 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
9140 : return;
9141 : #endif
9142 : }
9143 0 : ae_state_set_break_jump(&state, &_break_jump);
9144 0 : alglib_impl::ae_serializer_init(&serializer);
9145 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
9146 0 : alglib_impl::mlpealloc(&serializer, obj.c_ptr(), &state);
9147 0 : alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
9148 0 : alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
9149 0 : alglib_impl::mlpeserialize(&serializer, obj.c_ptr(), &state);
9150 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
9151 0 : alglib_impl::ae_serializer_clear(&serializer);
9152 0 : alglib_impl::ae_state_clear(&state);
9153 0 : }
9154 : /*************************************************************************
9155 : This function unserializes data structure from stream.
9156 : *************************************************************************/
9157 0 : void mlpeunserialize(const std::istream &s_in, mlpensemble &obj)
9158 : {
9159 : jmp_buf _break_jump;
9160 : alglib_impl::ae_state state;
9161 : alglib_impl::ae_serializer serializer;
9162 :
9163 0 : alglib_impl::ae_state_init(&state);
9164 0 : if( setjmp(_break_jump) )
9165 : {
9166 : #if !defined(AE_NO_EXCEPTIONS)
9167 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
9168 : #else
9169 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
9170 : return;
9171 : #endif
9172 : }
9173 0 : ae_state_set_break_jump(&state, &_break_jump);
9174 0 : alglib_impl::ae_serializer_init(&serializer);
9175 0 : alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
9176 0 : alglib_impl::mlpeunserialize(&serializer, obj.c_ptr(), &state);
9177 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
9178 0 : alglib_impl::ae_serializer_clear(&serializer);
9179 0 : alglib_impl::ae_state_clear(&state);
9180 0 : }
9181 :
9182 : /*************************************************************************
9183 : Like MLPCreate0, but for ensembles.
9184 :
9185 : -- ALGLIB --
9186 : Copyright 18.02.2009 by Bochkanov Sergey
9187 : *************************************************************************/
9188 0 : void mlpecreate0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9189 : {
9190 : jmp_buf _break_jump;
9191 : alglib_impl::ae_state _alglib_env_state;
9192 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9193 0 : if( setjmp(_break_jump) )
9194 : {
9195 : #if !defined(AE_NO_EXCEPTIONS)
9196 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9197 : #else
9198 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9199 : return;
9200 : #endif
9201 : }
9202 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9203 0 : if( _xparams.flags!=0x0 )
9204 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9205 0 : alglib_impl::mlpecreate0(nin, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9206 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9207 0 : return;
9208 : }
9209 :
9210 : /*************************************************************************
9211 : Like MLPCreate1, but for ensembles.
9212 :
9213 : -- ALGLIB --
9214 : Copyright 18.02.2009 by Bochkanov Sergey
9215 : *************************************************************************/
9216 0 : void mlpecreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9217 : {
9218 : jmp_buf _break_jump;
9219 : alglib_impl::ae_state _alglib_env_state;
9220 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9221 0 : if( setjmp(_break_jump) )
9222 : {
9223 : #if !defined(AE_NO_EXCEPTIONS)
9224 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9225 : #else
9226 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9227 : return;
9228 : #endif
9229 : }
9230 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9231 0 : if( _xparams.flags!=0x0 )
9232 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9233 0 : alglib_impl::mlpecreate1(nin, nhid, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9234 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9235 0 : return;
9236 : }
9237 :
9238 : /*************************************************************************
9239 : Like MLPCreate2, but for ensembles.
9240 :
9241 : -- ALGLIB --
9242 : Copyright 18.02.2009 by Bochkanov Sergey
9243 : *************************************************************************/
9244 0 : void mlpecreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9245 : {
9246 : jmp_buf _break_jump;
9247 : alglib_impl::ae_state _alglib_env_state;
9248 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9249 0 : if( setjmp(_break_jump) )
9250 : {
9251 : #if !defined(AE_NO_EXCEPTIONS)
9252 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9253 : #else
9254 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9255 : return;
9256 : #endif
9257 : }
9258 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9259 0 : if( _xparams.flags!=0x0 )
9260 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9261 0 : alglib_impl::mlpecreate2(nin, nhid1, nhid2, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9262 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9263 0 : return;
9264 : }
9265 :
9266 : /*************************************************************************
9267 : Like MLPCreateB0, but for ensembles.
9268 :
9269 : -- ALGLIB --
9270 : Copyright 18.02.2009 by Bochkanov Sergey
9271 : *************************************************************************/
9272 0 : void mlpecreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9273 : {
9274 : jmp_buf _break_jump;
9275 : alglib_impl::ae_state _alglib_env_state;
9276 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9277 0 : if( setjmp(_break_jump) )
9278 : {
9279 : #if !defined(AE_NO_EXCEPTIONS)
9280 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9281 : #else
9282 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9283 : return;
9284 : #endif
9285 : }
9286 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9287 0 : if( _xparams.flags!=0x0 )
9288 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9289 0 : alglib_impl::mlpecreateb0(nin, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9290 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9291 0 : return;
9292 : }
9293 :
9294 : /*************************************************************************
9295 : Like MLPCreateB1, but for ensembles.
9296 :
9297 : -- ALGLIB --
9298 : Copyright 18.02.2009 by Bochkanov Sergey
9299 : *************************************************************************/
9300 0 : void mlpecreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9301 : {
9302 : jmp_buf _break_jump;
9303 : alglib_impl::ae_state _alglib_env_state;
9304 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9305 0 : if( setjmp(_break_jump) )
9306 : {
9307 : #if !defined(AE_NO_EXCEPTIONS)
9308 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9309 : #else
9310 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9311 : return;
9312 : #endif
9313 : }
9314 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9315 0 : if( _xparams.flags!=0x0 )
9316 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9317 0 : alglib_impl::mlpecreateb1(nin, nhid, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9318 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9319 0 : return;
9320 : }
9321 :
9322 : /*************************************************************************
9323 : Like MLPCreateB2, but for ensembles.
9324 :
9325 : -- ALGLIB --
9326 : Copyright 18.02.2009 by Bochkanov Sergey
9327 : *************************************************************************/
9328 0 : void mlpecreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9329 : {
9330 : jmp_buf _break_jump;
9331 : alglib_impl::ae_state _alglib_env_state;
9332 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9333 0 : if( setjmp(_break_jump) )
9334 : {
9335 : #if !defined(AE_NO_EXCEPTIONS)
9336 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9337 : #else
9338 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9339 : return;
9340 : #endif
9341 : }
9342 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9343 0 : if( _xparams.flags!=0x0 )
9344 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9345 0 : alglib_impl::mlpecreateb2(nin, nhid1, nhid2, nout, b, d, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9346 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9347 0 : return;
9348 : }
9349 :
9350 : /*************************************************************************
9351 : Like MLPCreateR0, but for ensembles.
9352 :
9353 : -- ALGLIB --
9354 : Copyright 18.02.2009 by Bochkanov Sergey
9355 : *************************************************************************/
9356 0 : void mlpecreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9357 : {
9358 : jmp_buf _break_jump;
9359 : alglib_impl::ae_state _alglib_env_state;
9360 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9361 0 : if( setjmp(_break_jump) )
9362 : {
9363 : #if !defined(AE_NO_EXCEPTIONS)
9364 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9365 : #else
9366 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9367 : return;
9368 : #endif
9369 : }
9370 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9371 0 : if( _xparams.flags!=0x0 )
9372 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9373 0 : alglib_impl::mlpecreater0(nin, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9374 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9375 0 : return;
9376 : }
9377 :
9378 : /*************************************************************************
9379 : Like MLPCreateR1, but for ensembles.
9380 :
9381 : -- ALGLIB --
9382 : Copyright 18.02.2009 by Bochkanov Sergey
9383 : *************************************************************************/
9384 0 : void mlpecreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9385 : {
9386 : jmp_buf _break_jump;
9387 : alglib_impl::ae_state _alglib_env_state;
9388 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9389 0 : if( setjmp(_break_jump) )
9390 : {
9391 : #if !defined(AE_NO_EXCEPTIONS)
9392 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9393 : #else
9394 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9395 : return;
9396 : #endif
9397 : }
9398 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9399 0 : if( _xparams.flags!=0x0 )
9400 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9401 0 : alglib_impl::mlpecreater1(nin, nhid, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9402 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9403 0 : return;
9404 : }
9405 :
9406 : /*************************************************************************
9407 : Like MLPCreateR2, but for ensembles.
9408 :
9409 : -- ALGLIB --
9410 : Copyright 18.02.2009 by Bochkanov Sergey
9411 : *************************************************************************/
9412 0 : void mlpecreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9413 : {
9414 : jmp_buf _break_jump;
9415 : alglib_impl::ae_state _alglib_env_state;
9416 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9417 0 : if( setjmp(_break_jump) )
9418 : {
9419 : #if !defined(AE_NO_EXCEPTIONS)
9420 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9421 : #else
9422 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9423 : return;
9424 : #endif
9425 : }
9426 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9427 0 : if( _xparams.flags!=0x0 )
9428 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9429 0 : alglib_impl::mlpecreater2(nin, nhid1, nhid2, nout, a, b, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9430 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9431 0 : return;
9432 : }
9433 :
9434 : /*************************************************************************
9435 : Like MLPCreateC0, but for ensembles.
9436 :
9437 : -- ALGLIB --
9438 : Copyright 18.02.2009 by Bochkanov Sergey
9439 : *************************************************************************/
9440 0 : void mlpecreatec0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9441 : {
9442 : jmp_buf _break_jump;
9443 : alglib_impl::ae_state _alglib_env_state;
9444 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9445 0 : if( setjmp(_break_jump) )
9446 : {
9447 : #if !defined(AE_NO_EXCEPTIONS)
9448 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9449 : #else
9450 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9451 : return;
9452 : #endif
9453 : }
9454 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9455 0 : if( _xparams.flags!=0x0 )
9456 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9457 0 : alglib_impl::mlpecreatec0(nin, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9458 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9459 0 : return;
9460 : }
9461 :
9462 : /*************************************************************************
9463 : Like MLPCreateC1, but for ensembles.
9464 :
9465 : -- ALGLIB --
9466 : Copyright 18.02.2009 by Bochkanov Sergey
9467 : *************************************************************************/
9468 0 : void mlpecreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9469 : {
9470 : jmp_buf _break_jump;
9471 : alglib_impl::ae_state _alglib_env_state;
9472 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9473 0 : if( setjmp(_break_jump) )
9474 : {
9475 : #if !defined(AE_NO_EXCEPTIONS)
9476 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9477 : #else
9478 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9479 : return;
9480 : #endif
9481 : }
9482 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9483 0 : if( _xparams.flags!=0x0 )
9484 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9485 0 : alglib_impl::mlpecreatec1(nin, nhid, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9486 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9487 0 : return;
9488 : }
9489 :
9490 : /*************************************************************************
9491 : Like MLPCreateC2, but for ensembles.
9492 :
9493 : -- ALGLIB --
9494 : Copyright 18.02.2009 by Bochkanov Sergey
9495 : *************************************************************************/
9496 0 : void mlpecreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9497 : {
9498 : jmp_buf _break_jump;
9499 : alglib_impl::ae_state _alglib_env_state;
9500 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9501 0 : if( setjmp(_break_jump) )
9502 : {
9503 : #if !defined(AE_NO_EXCEPTIONS)
9504 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9505 : #else
9506 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9507 : return;
9508 : #endif
9509 : }
9510 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9511 0 : if( _xparams.flags!=0x0 )
9512 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9513 0 : alglib_impl::mlpecreatec2(nin, nhid1, nhid2, nout, ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9514 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9515 0 : return;
9516 : }
9517 :
9518 : /*************************************************************************
9519 : Creates ensemble from network. Only network geometry is copied.
9520 :
9521 : -- ALGLIB --
9522 : Copyright 17.02.2009 by Bochkanov Sergey
9523 : *************************************************************************/
9524 0 : void mlpecreatefromnetwork(const multilayerperceptron &network, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams)
9525 : {
9526 : jmp_buf _break_jump;
9527 : alglib_impl::ae_state _alglib_env_state;
9528 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9529 0 : if( setjmp(_break_jump) )
9530 : {
9531 : #if !defined(AE_NO_EXCEPTIONS)
9532 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9533 : #else
9534 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9535 : return;
9536 : #endif
9537 : }
9538 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9539 0 : if( _xparams.flags!=0x0 )
9540 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9541 0 : alglib_impl::mlpecreatefromnetwork(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), ensemblesize, const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9542 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9543 0 : return;
9544 : }
9545 :
9546 : /*************************************************************************
9547 : Randomization of MLP ensemble
9548 :
9549 : -- ALGLIB --
9550 : Copyright 17.02.2009 by Bochkanov Sergey
9551 : *************************************************************************/
9552 0 : void mlperandomize(const mlpensemble &ensemble, const xparams _xparams)
9553 : {
9554 : jmp_buf _break_jump;
9555 : alglib_impl::ae_state _alglib_env_state;
9556 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9557 0 : if( setjmp(_break_jump) )
9558 : {
9559 : #if !defined(AE_NO_EXCEPTIONS)
9560 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9561 : #else
9562 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9563 : return;
9564 : #endif
9565 : }
9566 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9567 0 : if( _xparams.flags!=0x0 )
9568 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9569 0 : alglib_impl::mlperandomize(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9570 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9571 0 : return;
9572 : }
9573 :
9574 : /*************************************************************************
9575 : Return ensemble properties (number of inputs and outputs).
9576 :
9577 : -- ALGLIB --
9578 : Copyright 17.02.2009 by Bochkanov Sergey
9579 : *************************************************************************/
9580 0 : void mlpeproperties(const mlpensemble &ensemble, ae_int_t &nin, ae_int_t &nout, const xparams _xparams)
9581 : {
9582 : jmp_buf _break_jump;
9583 : alglib_impl::ae_state _alglib_env_state;
9584 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9585 0 : if( setjmp(_break_jump) )
9586 : {
9587 : #if !defined(AE_NO_EXCEPTIONS)
9588 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9589 : #else
9590 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9591 : return;
9592 : #endif
9593 : }
9594 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9595 0 : if( _xparams.flags!=0x0 )
9596 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9597 0 : alglib_impl::mlpeproperties(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &nin, &nout, &_alglib_env_state);
9598 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9599 0 : return;
9600 : }
9601 :
9602 : /*************************************************************************
9603 : Return normalization type (whether ensemble is SOFTMAX-normalized or not).
9604 :
9605 : -- ALGLIB --
9606 : Copyright 17.02.2009 by Bochkanov Sergey
9607 : *************************************************************************/
9608 0 : bool mlpeissoftmax(const mlpensemble &ensemble, const xparams _xparams)
9609 : {
9610 : jmp_buf _break_jump;
9611 : alglib_impl::ae_state _alglib_env_state;
9612 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9613 0 : if( setjmp(_break_jump) )
9614 : {
9615 : #if !defined(AE_NO_EXCEPTIONS)
9616 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9617 : #else
9618 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9619 : return 0;
9620 : #endif
9621 : }
9622 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9623 0 : if( _xparams.flags!=0x0 )
9624 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9625 0 : ae_bool result = alglib_impl::mlpeissoftmax(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), &_alglib_env_state);
9626 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9627 0 : return *(reinterpret_cast<bool*>(&result));
9628 : }
9629 :
9630 : /*************************************************************************
9631 : Procesing
9632 :
9633 : INPUT PARAMETERS:
9634 : Ensemble- neural networks ensemble
9635 : X - input vector, array[0..NIn-1].
9636 : Y - (possibly) preallocated buffer; if size of Y is less than
9637 : NOut, it will be reallocated. If it is large enough, it
9638 : is NOT reallocated, so we can save some time on reallocation.
9639 :
9640 :
9641 : OUTPUT PARAMETERS:
9642 : Y - result. Regression estimate when solving regression task,
9643 : vector of posterior probabilities for classification task.
9644 :
9645 : -- ALGLIB --
9646 : Copyright 17.02.2009 by Bochkanov Sergey
9647 : *************************************************************************/
9648 0 : void mlpeprocess(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
9649 : {
9650 : jmp_buf _break_jump;
9651 : alglib_impl::ae_state _alglib_env_state;
9652 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9653 0 : if( setjmp(_break_jump) )
9654 : {
9655 : #if !defined(AE_NO_EXCEPTIONS)
9656 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9657 : #else
9658 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9659 : return;
9660 : #endif
9661 : }
9662 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9663 0 : if( _xparams.flags!=0x0 )
9664 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9665 0 : alglib_impl::mlpeprocess(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
9666 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9667 0 : return;
9668 : }
9669 :
9670 : /*************************************************************************
9671 : 'interactive' variant of MLPEProcess for languages like Python which
9672 : support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the
9673 : interpreter
9674 :
9675 : This function allocates new array on each call, so it is significantly
9676 : slower than its 'non-interactive' counterpart, but it is more convenient
9677 : when you call it from command line.
9678 :
9679 : -- ALGLIB --
9680 : Copyright 17.02.2009 by Bochkanov Sergey
9681 : *************************************************************************/
9682 0 : void mlpeprocessi(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
9683 : {
9684 : jmp_buf _break_jump;
9685 : alglib_impl::ae_state _alglib_env_state;
9686 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9687 0 : if( setjmp(_break_jump) )
9688 : {
9689 : #if !defined(AE_NO_EXCEPTIONS)
9690 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9691 : #else
9692 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9693 : return;
9694 : #endif
9695 : }
9696 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9697 0 : if( _xparams.flags!=0x0 )
9698 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9699 0 : alglib_impl::mlpeprocessi(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
9700 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9701 0 : return;
9702 : }
9703 :
9704 : /*************************************************************************
9705 : Relative classification error on the test set
9706 :
9707 : INPUT PARAMETERS:
9708 : Ensemble- ensemble
9709 : XY - test set
9710 : NPoints - test set size
9711 :
9712 : RESULT:
9713 : percent of incorrectly classified cases.
9714 : Works both for classifier betwork and for regression networks which
9715 : are used as classifiers.
9716 :
9717 : -- ALGLIB --
9718 : Copyright 17.02.2009 by Bochkanov Sergey
9719 : *************************************************************************/
9720 0 : double mlperelclserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
9721 : {
9722 : jmp_buf _break_jump;
9723 : alglib_impl::ae_state _alglib_env_state;
9724 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9725 0 : if( setjmp(_break_jump) )
9726 : {
9727 : #if !defined(AE_NO_EXCEPTIONS)
9728 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9729 : #else
9730 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9731 : return 0;
9732 : #endif
9733 : }
9734 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9735 0 : if( _xparams.flags!=0x0 )
9736 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9737 0 : double result = alglib_impl::mlperelclserror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
9738 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9739 0 : return *(reinterpret_cast<double*>(&result));
9740 : }
9741 :
9742 : /*************************************************************************
9743 : Average cross-entropy (in bits per element) on the test set
9744 :
9745 : INPUT PARAMETERS:
9746 : Ensemble- ensemble
9747 : XY - test set
9748 : NPoints - test set size
9749 :
9750 : RESULT:
9751 : CrossEntropy/(NPoints*LN(2)).
9752 : Zero if ensemble solves regression task.
9753 :
9754 : -- ALGLIB --
9755 : Copyright 17.02.2009 by Bochkanov Sergey
9756 : *************************************************************************/
9757 0 : double mlpeavgce(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
9758 : {
9759 : jmp_buf _break_jump;
9760 : alglib_impl::ae_state _alglib_env_state;
9761 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9762 0 : if( setjmp(_break_jump) )
9763 : {
9764 : #if !defined(AE_NO_EXCEPTIONS)
9765 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9766 : #else
9767 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9768 : return 0;
9769 : #endif
9770 : }
9771 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9772 0 : if( _xparams.flags!=0x0 )
9773 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9774 0 : double result = alglib_impl::mlpeavgce(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
9775 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9776 0 : return *(reinterpret_cast<double*>(&result));
9777 : }
9778 :
9779 : /*************************************************************************
9780 : RMS error on the test set
9781 :
9782 : INPUT PARAMETERS:
9783 : Ensemble- ensemble
9784 : XY - test set
9785 : NPoints - test set size
9786 :
9787 : RESULT:
9788 : root mean square error.
9789 : Its meaning for regression task is obvious. As for classification task
9790 : RMS error means error when estimating posterior probabilities.
9791 :
9792 : -- ALGLIB --
9793 : Copyright 17.02.2009 by Bochkanov Sergey
9794 : *************************************************************************/
9795 0 : double mlpermserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
9796 : {
9797 : jmp_buf _break_jump;
9798 : alglib_impl::ae_state _alglib_env_state;
9799 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9800 0 : if( setjmp(_break_jump) )
9801 : {
9802 : #if !defined(AE_NO_EXCEPTIONS)
9803 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9804 : #else
9805 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9806 : return 0;
9807 : #endif
9808 : }
9809 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9810 0 : if( _xparams.flags!=0x0 )
9811 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9812 0 : double result = alglib_impl::mlpermserror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
9813 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9814 0 : return *(reinterpret_cast<double*>(&result));
9815 : }
9816 :
9817 : /*************************************************************************
9818 : Average error on the test set
9819 :
9820 : INPUT PARAMETERS:
9821 : Ensemble- ensemble
9822 : XY - test set
9823 : NPoints - test set size
9824 :
9825 : RESULT:
9826 : Its meaning for regression task is obvious. As for classification task
9827 : it means average error when estimating posterior probabilities.
9828 :
9829 : -- ALGLIB --
9830 : Copyright 17.02.2009 by Bochkanov Sergey
9831 : *************************************************************************/
9832 0 : double mlpeavgerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
9833 : {
9834 : jmp_buf _break_jump;
9835 : alglib_impl::ae_state _alglib_env_state;
9836 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9837 0 : if( setjmp(_break_jump) )
9838 : {
9839 : #if !defined(AE_NO_EXCEPTIONS)
9840 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9841 : #else
9842 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9843 : return 0;
9844 : #endif
9845 : }
9846 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9847 0 : if( _xparams.flags!=0x0 )
9848 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9849 0 : double result = alglib_impl::mlpeavgerror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
9850 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9851 0 : return *(reinterpret_cast<double*>(&result));
9852 : }
9853 :
9854 : /*************************************************************************
9855 : Average relative error on the test set
9856 :
9857 : INPUT PARAMETERS:
9858 : Ensemble- ensemble
9859 : XY - test set
9860 : NPoints - test set size
9861 :
9862 : RESULT:
9863 : Its meaning for regression task is obvious. As for classification task
9864 : it means average relative error when estimating posterior probabilities.
9865 :
9866 : -- ALGLIB --
9867 : Copyright 17.02.2009 by Bochkanov Sergey
9868 : *************************************************************************/
9869 0 : double mlpeavgrelerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
9870 : {
9871 : jmp_buf _break_jump;
9872 : alglib_impl::ae_state _alglib_env_state;
9873 0 : alglib_impl::ae_state_init(&_alglib_env_state);
9874 0 : if( setjmp(_break_jump) )
9875 : {
9876 : #if !defined(AE_NO_EXCEPTIONS)
9877 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
9878 : #else
9879 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
9880 : return 0;
9881 : #endif
9882 : }
9883 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
9884 0 : if( _xparams.flags!=0x0 )
9885 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
9886 0 : double result = alglib_impl::mlpeavgrelerror(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
9887 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
9888 0 : return *(reinterpret_cast<double*>(&result));
9889 : }
9890 : #endif
9891 :
9892 : #if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
9893 : /*************************************************************************
9894 : Training report:
9895 : * RelCLSError - fraction of misclassified cases.
9896 : * AvgCE - acerage cross-entropy
9897 : * RMSError - root-mean-square error
9898 : * AvgError - average error
9899 : * AvgRelError - average relative error
9900 : * NGrad - number of gradient calculations
9901 : * NHess - number of Hessian calculations
9902 : * NCholesky - number of Cholesky decompositions
9903 :
9904 : NOTE 1: RelCLSError/AvgCE are zero on regression problems.
9905 :
9906 : NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
9907 : errors in prediction of posterior probabilities
9908 : *************************************************************************/
9909 0 : _mlpreport_owner::_mlpreport_owner()
9910 : {
9911 : jmp_buf _break_jump;
9912 : alglib_impl::ae_state _state;
9913 :
9914 0 : alglib_impl::ae_state_init(&_state);
9915 0 : if( setjmp(_break_jump) )
9916 : {
9917 0 : if( p_struct!=NULL )
9918 : {
9919 0 : alglib_impl::_mlpreport_destroy(p_struct);
9920 0 : alglib_impl::ae_free(p_struct);
9921 : }
9922 0 : p_struct = NULL;
9923 : #if !defined(AE_NO_EXCEPTIONS)
9924 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
9925 : #else
9926 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
9927 : return;
9928 : #endif
9929 : }
9930 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
9931 0 : p_struct = NULL;
9932 0 : p_struct = (alglib_impl::mlpreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpreport), &_state);
9933 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
9934 0 : alglib_impl::_mlpreport_init(p_struct, &_state, ae_false);
9935 0 : ae_state_clear(&_state);
9936 0 : }
9937 :
9938 0 : _mlpreport_owner::_mlpreport_owner(const _mlpreport_owner &rhs)
9939 : {
9940 : jmp_buf _break_jump;
9941 : alglib_impl::ae_state _state;
9942 :
9943 0 : alglib_impl::ae_state_init(&_state);
9944 0 : if( setjmp(_break_jump) )
9945 : {
9946 0 : if( p_struct!=NULL )
9947 : {
9948 0 : alglib_impl::_mlpreport_destroy(p_struct);
9949 0 : alglib_impl::ae_free(p_struct);
9950 : }
9951 0 : p_struct = NULL;
9952 : #if !defined(AE_NO_EXCEPTIONS)
9953 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
9954 : #else
9955 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
9956 : return;
9957 : #endif
9958 : }
9959 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
9960 0 : p_struct = NULL;
9961 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpreport copy constructor failure (source is not initialized)", &_state);
9962 0 : p_struct = (alglib_impl::mlpreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpreport), &_state);
9963 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
9964 0 : alglib_impl::_mlpreport_init_copy(p_struct, const_cast<alglib_impl::mlpreport*>(rhs.p_struct), &_state, ae_false);
9965 0 : ae_state_clear(&_state);
9966 0 : }
9967 :
9968 0 : _mlpreport_owner& _mlpreport_owner::operator=(const _mlpreport_owner &rhs)
9969 : {
9970 0 : if( this==&rhs )
9971 0 : return *this;
9972 : jmp_buf _break_jump;
9973 : alglib_impl::ae_state _state;
9974 :
9975 0 : alglib_impl::ae_state_init(&_state);
9976 0 : if( setjmp(_break_jump) )
9977 : {
9978 : #if !defined(AE_NO_EXCEPTIONS)
9979 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
9980 : #else
9981 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
9982 : return *this;
9983 : #endif
9984 : }
9985 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
9986 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpreport assignment constructor failure (destination is not initialized)", &_state);
9987 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpreport assignment constructor failure (source is not initialized)", &_state);
9988 0 : alglib_impl::_mlpreport_destroy(p_struct);
9989 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpreport));
9990 0 : alglib_impl::_mlpreport_init_copy(p_struct, const_cast<alglib_impl::mlpreport*>(rhs.p_struct), &_state, ae_false);
9991 0 : ae_state_clear(&_state);
9992 0 : return *this;
9993 : }
9994 :
9995 0 : _mlpreport_owner::~_mlpreport_owner()
9996 : {
9997 0 : if( p_struct!=NULL )
9998 : {
9999 0 : alglib_impl::_mlpreport_destroy(p_struct);
10000 0 : ae_free(p_struct);
10001 : }
10002 0 : }
10003 :
10004 0 : alglib_impl::mlpreport* _mlpreport_owner::c_ptr()
10005 : {
10006 0 : return p_struct;
10007 : }
10008 :
10009 0 : alglib_impl::mlpreport* _mlpreport_owner::c_ptr() const
10010 : {
10011 0 : return const_cast<alglib_impl::mlpreport*>(p_struct);
10012 : }
10013 0 : mlpreport::mlpreport() : _mlpreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
10014 : {
10015 0 : }
10016 :
10017 0 : mlpreport::mlpreport(const mlpreport &rhs):_mlpreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),ngrad(p_struct->ngrad),nhess(p_struct->nhess),ncholesky(p_struct->ncholesky)
10018 : {
10019 0 : }
10020 :
10021 0 : mlpreport& mlpreport::operator=(const mlpreport &rhs)
10022 : {
10023 0 : if( this==&rhs )
10024 0 : return *this;
10025 0 : _mlpreport_owner::operator=(rhs);
10026 0 : return *this;
10027 : }
10028 :
10029 0 : mlpreport::~mlpreport()
10030 : {
10031 0 : }
10032 :
10033 :
10034 : /*************************************************************************
10035 : Cross-validation estimates of generalization error
10036 : *************************************************************************/
10037 0 : _mlpcvreport_owner::_mlpcvreport_owner()
10038 : {
10039 : jmp_buf _break_jump;
10040 : alglib_impl::ae_state _state;
10041 :
10042 0 : alglib_impl::ae_state_init(&_state);
10043 0 : if( setjmp(_break_jump) )
10044 : {
10045 0 : if( p_struct!=NULL )
10046 : {
10047 0 : alglib_impl::_mlpcvreport_destroy(p_struct);
10048 0 : alglib_impl::ae_free(p_struct);
10049 : }
10050 0 : p_struct = NULL;
10051 : #if !defined(AE_NO_EXCEPTIONS)
10052 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10053 : #else
10054 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10055 : return;
10056 : #endif
10057 : }
10058 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10059 0 : p_struct = NULL;
10060 0 : p_struct = (alglib_impl::mlpcvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpcvreport), &_state);
10061 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
10062 0 : alglib_impl::_mlpcvreport_init(p_struct, &_state, ae_false);
10063 0 : ae_state_clear(&_state);
10064 0 : }
10065 :
10066 0 : _mlpcvreport_owner::_mlpcvreport_owner(const _mlpcvreport_owner &rhs)
10067 : {
10068 : jmp_buf _break_jump;
10069 : alglib_impl::ae_state _state;
10070 :
10071 0 : alglib_impl::ae_state_init(&_state);
10072 0 : if( setjmp(_break_jump) )
10073 : {
10074 0 : if( p_struct!=NULL )
10075 : {
10076 0 : alglib_impl::_mlpcvreport_destroy(p_struct);
10077 0 : alglib_impl::ae_free(p_struct);
10078 : }
10079 0 : p_struct = NULL;
10080 : #if !defined(AE_NO_EXCEPTIONS)
10081 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10082 : #else
10083 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10084 : return;
10085 : #endif
10086 : }
10087 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10088 0 : p_struct = NULL;
10089 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpcvreport copy constructor failure (source is not initialized)", &_state);
10090 0 : p_struct = (alglib_impl::mlpcvreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlpcvreport), &_state);
10091 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
10092 0 : alglib_impl::_mlpcvreport_init_copy(p_struct, const_cast<alglib_impl::mlpcvreport*>(rhs.p_struct), &_state, ae_false);
10093 0 : ae_state_clear(&_state);
10094 0 : }
10095 :
10096 0 : _mlpcvreport_owner& _mlpcvreport_owner::operator=(const _mlpcvreport_owner &rhs)
10097 : {
10098 0 : if( this==&rhs )
10099 0 : return *this;
10100 : jmp_buf _break_jump;
10101 : alglib_impl::ae_state _state;
10102 :
10103 0 : alglib_impl::ae_state_init(&_state);
10104 0 : if( setjmp(_break_jump) )
10105 : {
10106 : #if !defined(AE_NO_EXCEPTIONS)
10107 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10108 : #else
10109 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10110 : return *this;
10111 : #endif
10112 : }
10113 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10114 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlpcvreport assignment constructor failure (destination is not initialized)", &_state);
10115 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlpcvreport assignment constructor failure (source is not initialized)", &_state);
10116 0 : alglib_impl::_mlpcvreport_destroy(p_struct);
10117 0 : memset(p_struct, 0, sizeof(alglib_impl::mlpcvreport));
10118 0 : alglib_impl::_mlpcvreport_init_copy(p_struct, const_cast<alglib_impl::mlpcvreport*>(rhs.p_struct), &_state, ae_false);
10119 0 : ae_state_clear(&_state);
10120 0 : return *this;
10121 : }
10122 :
10123 0 : _mlpcvreport_owner::~_mlpcvreport_owner()
10124 : {
10125 0 : if( p_struct!=NULL )
10126 : {
10127 0 : alglib_impl::_mlpcvreport_destroy(p_struct);
10128 0 : ae_free(p_struct);
10129 : }
10130 0 : }
10131 :
10132 0 : alglib_impl::mlpcvreport* _mlpcvreport_owner::c_ptr()
10133 : {
10134 0 : return p_struct;
10135 : }
10136 :
10137 0 : alglib_impl::mlpcvreport* _mlpcvreport_owner::c_ptr() const
10138 : {
10139 0 : return const_cast<alglib_impl::mlpcvreport*>(p_struct);
10140 : }
10141 0 : mlpcvreport::mlpcvreport() : _mlpcvreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
10142 : {
10143 0 : }
10144 :
10145 0 : mlpcvreport::mlpcvreport(const mlpcvreport &rhs):_mlpcvreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
10146 : {
10147 0 : }
10148 :
10149 0 : mlpcvreport& mlpcvreport::operator=(const mlpcvreport &rhs)
10150 : {
10151 0 : if( this==&rhs )
10152 0 : return *this;
10153 0 : _mlpcvreport_owner::operator=(rhs);
10154 0 : return *this;
10155 : }
10156 :
10157 0 : mlpcvreport::~mlpcvreport()
10158 : {
10159 0 : }
10160 :
10161 :
10162 : /*************************************************************************
10163 : Trainer object for neural network.
10164 :
10165 : You should not try to access fields of this object directly - use ALGLIB
10166 : functions to work with this object.
10167 : *************************************************************************/
10168 0 : _mlptrainer_owner::_mlptrainer_owner()
10169 : {
10170 : jmp_buf _break_jump;
10171 : alglib_impl::ae_state _state;
10172 :
10173 0 : alglib_impl::ae_state_init(&_state);
10174 0 : if( setjmp(_break_jump) )
10175 : {
10176 0 : if( p_struct!=NULL )
10177 : {
10178 0 : alglib_impl::_mlptrainer_destroy(p_struct);
10179 0 : alglib_impl::ae_free(p_struct);
10180 : }
10181 0 : p_struct = NULL;
10182 : #if !defined(AE_NO_EXCEPTIONS)
10183 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10184 : #else
10185 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10186 : return;
10187 : #endif
10188 : }
10189 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10190 0 : p_struct = NULL;
10191 0 : p_struct = (alglib_impl::mlptrainer*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlptrainer), &_state);
10192 0 : memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
10193 0 : alglib_impl::_mlptrainer_init(p_struct, &_state, ae_false);
10194 0 : ae_state_clear(&_state);
10195 0 : }
10196 :
10197 0 : _mlptrainer_owner::_mlptrainer_owner(const _mlptrainer_owner &rhs)
10198 : {
10199 : jmp_buf _break_jump;
10200 : alglib_impl::ae_state _state;
10201 :
10202 0 : alglib_impl::ae_state_init(&_state);
10203 0 : if( setjmp(_break_jump) )
10204 : {
10205 0 : if( p_struct!=NULL )
10206 : {
10207 0 : alglib_impl::_mlptrainer_destroy(p_struct);
10208 0 : alglib_impl::ae_free(p_struct);
10209 : }
10210 0 : p_struct = NULL;
10211 : #if !defined(AE_NO_EXCEPTIONS)
10212 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10213 : #else
10214 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10215 : return;
10216 : #endif
10217 : }
10218 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10219 0 : p_struct = NULL;
10220 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlptrainer copy constructor failure (source is not initialized)", &_state);
10221 0 : p_struct = (alglib_impl::mlptrainer*)alglib_impl::ae_malloc(sizeof(alglib_impl::mlptrainer), &_state);
10222 0 : memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
10223 0 : alglib_impl::_mlptrainer_init_copy(p_struct, const_cast<alglib_impl::mlptrainer*>(rhs.p_struct), &_state, ae_false);
10224 0 : ae_state_clear(&_state);
10225 0 : }
10226 :
10227 0 : _mlptrainer_owner& _mlptrainer_owner::operator=(const _mlptrainer_owner &rhs)
10228 : {
10229 0 : if( this==&rhs )
10230 0 : return *this;
10231 : jmp_buf _break_jump;
10232 : alglib_impl::ae_state _state;
10233 :
10234 0 : alglib_impl::ae_state_init(&_state);
10235 0 : if( setjmp(_break_jump) )
10236 : {
10237 : #if !defined(AE_NO_EXCEPTIONS)
10238 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
10239 : #else
10240 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
10241 : return *this;
10242 : #endif
10243 : }
10244 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
10245 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: mlptrainer assignment constructor failure (destination is not initialized)", &_state);
10246 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: mlptrainer assignment constructor failure (source is not initialized)", &_state);
10247 0 : alglib_impl::_mlptrainer_destroy(p_struct);
10248 0 : memset(p_struct, 0, sizeof(alglib_impl::mlptrainer));
10249 0 : alglib_impl::_mlptrainer_init_copy(p_struct, const_cast<alglib_impl::mlptrainer*>(rhs.p_struct), &_state, ae_false);
10250 0 : ae_state_clear(&_state);
10251 0 : return *this;
10252 : }
10253 :
10254 0 : _mlptrainer_owner::~_mlptrainer_owner()
10255 : {
10256 0 : if( p_struct!=NULL )
10257 : {
10258 0 : alglib_impl::_mlptrainer_destroy(p_struct);
10259 0 : ae_free(p_struct);
10260 : }
10261 0 : }
10262 :
10263 0 : alglib_impl::mlptrainer* _mlptrainer_owner::c_ptr()
10264 : {
10265 0 : return p_struct;
10266 : }
10267 :
10268 0 : alglib_impl::mlptrainer* _mlptrainer_owner::c_ptr() const
10269 : {
10270 0 : return const_cast<alglib_impl::mlptrainer*>(p_struct);
10271 : }
10272 0 : mlptrainer::mlptrainer() : _mlptrainer_owner()
10273 : {
10274 0 : }
10275 :
10276 0 : mlptrainer::mlptrainer(const mlptrainer &rhs):_mlptrainer_owner(rhs)
10277 : {
10278 0 : }
10279 :
10280 0 : mlptrainer& mlptrainer::operator=(const mlptrainer &rhs)
10281 : {
10282 0 : if( this==&rhs )
10283 0 : return *this;
10284 0 : _mlptrainer_owner::operator=(rhs);
10285 0 : return *this;
10286 : }
10287 :
10288 0 : mlptrainer::~mlptrainer()
10289 : {
10290 0 : }
10291 :
10292 : /*************************************************************************
10293 : Neural network training using modified Levenberg-Marquardt with exact
10294 : Hessian calculation and regularization. Subroutine trains neural network
10295 : with restarts from random positions. Algorithm is well suited for small
10296 : and medium scale problems (hundreds of weights).
10297 :
10298 : INPUT PARAMETERS:
10299 : Network - neural network with initialized geometry
10300 : XY - training set
10301 : NPoints - training set size
10302 : Decay - weight decay constant, >=0.001
10303 : Decay term 'Decay*||Weights||^2' is added to error
10304 : function.
10305 : If you don't know what Decay to choose, use 0.001.
10306 : Restarts - number of restarts from random position, >0.
10307 : If you don't know what Restarts to choose, use 2.
10308 :
10309 : OUTPUT PARAMETERS:
10310 : Network - trained neural network.
10311 : Info - return code:
10312 : * -9, if internal matrix inverse subroutine failed
10313 : * -2, if there is a point with class number
10314 : outside of [0..NOut-1].
10315 : * -1, if wrong parameters specified
10316 : (NPoints<0, Restarts<1).
10317 : * 2, if task has been solved.
10318 : Rep - training report
10319 :
10320 : -- ALGLIB --
10321 : Copyright 10.03.2009 by Bochkanov Sergey
10322 : *************************************************************************/
10323 0 : void mlptrainlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
10324 : {
10325 : jmp_buf _break_jump;
10326 : alglib_impl::ae_state _alglib_env_state;
10327 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10328 0 : if( setjmp(_break_jump) )
10329 : {
10330 : #if !defined(AE_NO_EXCEPTIONS)
10331 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10332 : #else
10333 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10334 : return;
10335 : #endif
10336 : }
10337 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10338 0 : if( _xparams.flags!=0x0 )
10339 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10340 0 : alglib_impl::mlptrainlm(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
10341 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10342 0 : return;
10343 : }
10344 :
10345 : /*************************************************************************
10346 : Neural network training using L-BFGS algorithm with regularization.
10347 : Subroutine trains neural network with restarts from random positions.
10348 : Algorithm is well suited for problems of any dimensionality (memory
10349 : requirements and step complexity are linear by weights number).
10350 :
10351 : INPUT PARAMETERS:
10352 : Network - neural network with initialized geometry
10353 : XY - training set
10354 : NPoints - training set size
10355 : Decay - weight decay constant, >=0.001
10356 : Decay term 'Decay*||Weights||^2' is added to error
10357 : function.
10358 : If you don't know what Decay to choose, use 0.001.
10359 : Restarts - number of restarts from random position, >0.
10360 : If you don't know what Restarts to choose, use 2.
10361 : WStep - stopping criterion. Algorithm stops if step size is
10362 : less than WStep. Recommended value - 0.01. Zero step
10363 : size means stopping after MaxIts iterations.
10364 : MaxIts - stopping criterion. Algorithm stops after MaxIts
10365 : iterations (NOT gradient calculations). Zero MaxIts
10366 : means stopping when step is sufficiently small.
10367 :
10368 : OUTPUT PARAMETERS:
10369 : Network - trained neural network.
10370 : Info - return code:
10371 : * -8, if both WStep=0 and MaxIts=0
10372 : * -2, if there is a point with class number
10373 : outside of [0..NOut-1].
10374 : * -1, if wrong parameters specified
10375 : (NPoints<0, Restarts<1).
10376 : * 2, if task has been solved.
10377 : Rep - training report
10378 :
10379 : -- ALGLIB --
10380 : Copyright 09.12.2007 by Bochkanov Sergey
10381 : *************************************************************************/
10382 0 : void mlptrainlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, const xparams _xparams)
10383 : {
10384 : jmp_buf _break_jump;
10385 : alglib_impl::ae_state _alglib_env_state;
10386 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10387 0 : if( setjmp(_break_jump) )
10388 : {
10389 : #if !defined(AE_NO_EXCEPTIONS)
10390 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10391 : #else
10392 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10393 : return;
10394 : #endif
10395 : }
10396 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10397 0 : if( _xparams.flags!=0x0 )
10398 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10399 0 : alglib_impl::mlptrainlbfgs(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
10400 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10401 0 : return;
10402 : }
10403 :
10404 : /*************************************************************************
10405 : Neural network training using early stopping (base algorithm - L-BFGS with
10406 : regularization).
10407 :
10408 : INPUT PARAMETERS:
10409 : Network - neural network with initialized geometry
10410 : TrnXY - training set
10411 : TrnSize - training set size, TrnSize>0
10412 : ValXY - validation set
10413 : ValSize - validation set size, ValSize>0
10414 : Decay - weight decay constant, >=0.001
10415 : Decay term 'Decay*||Weights||^2' is added to error
10416 : function.
10417 : If you don't know what Decay to choose, use 0.001.
10418 : Restarts - number of restarts, either:
10419 : * strictly positive number - algorithm make specified
10420 : number of restarts from random position.
10421 : * -1, in which case algorithm makes exactly one run
10422 : from the initial state of the network (no randomization).
10423 : If you don't know what Restarts to choose, choose one
10424 : one the following:
10425 : * -1 (deterministic start)
10426 : * +1 (one random restart)
10427 : * +5 (moderate amount of random restarts)
10428 :
10429 : OUTPUT PARAMETERS:
10430 : Network - trained neural network.
10431 : Info - return code:
10432 : * -2, if there is a point with class number
10433 : outside of [0..NOut-1].
10434 : * -1, if wrong parameters specified
10435 : (NPoints<0, Restarts<1, ...).
10436 : * 2, task has been solved, stopping criterion met -
10437 : sufficiently small step size. Not expected (we
10438 : use EARLY stopping) but possible and not an
10439 : error.
10440 : * 6, task has been solved, stopping criterion met -
10441 : increasing of validation set error.
10442 : Rep - training report
10443 :
10444 : NOTE:
10445 :
10446 : Algorithm stops if validation set error increases for a long enough or
10447 : step size is small enought (there are task where validation set may
10448 : decrease for eternity). In any case solution returned corresponds to the
10449 : minimum of validation set error.
10450 :
10451 : -- ALGLIB --
10452 : Copyright 10.03.2009 by Bochkanov Sergey
10453 : *************************************************************************/
10454 0 : void mlptraines(const multilayerperceptron &network, const real_2d_array &trnxy, const ae_int_t trnsize, const real_2d_array &valxy, const ae_int_t valsize, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
10455 : {
10456 : jmp_buf _break_jump;
10457 : alglib_impl::ae_state _alglib_env_state;
10458 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10459 0 : if( setjmp(_break_jump) )
10460 : {
10461 : #if !defined(AE_NO_EXCEPTIONS)
10462 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10463 : #else
10464 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10465 : return;
10466 : #endif
10467 : }
10468 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10469 0 : if( _xparams.flags!=0x0 )
10470 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10471 0 : alglib_impl::mlptraines(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(trnxy.c_ptr()), trnsize, const_cast<alglib_impl::ae_matrix*>(valxy.c_ptr()), valsize, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
10472 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10473 0 : return;
10474 : }
10475 :
10476 : /*************************************************************************
10477 : Cross-validation estimate of generalization error.
10478 :
10479 : Base algorithm - L-BFGS.
10480 :
10481 : INPUT PARAMETERS:
10482 : Network - neural network with initialized geometry. Network is
10483 : not changed during cross-validation - it is used only
10484 : as a representative of its architecture.
10485 : XY - training set.
10486 : SSize - training set size
10487 : Decay - weight decay, same as in MLPTrainLBFGS
10488 : Restarts - number of restarts, >0.
10489 : restarts are counted for each partition separately, so
10490 : total number of restarts will be Restarts*FoldsCount.
10491 : WStep - stopping criterion, same as in MLPTrainLBFGS
10492 : MaxIts - stopping criterion, same as in MLPTrainLBFGS
10493 : FoldsCount - number of folds in k-fold cross-validation,
10494 : 2<=FoldsCount<=SSize.
10495 : recommended value: 10.
10496 :
10497 : OUTPUT PARAMETERS:
10498 : Info - return code, same as in MLPTrainLBFGS
10499 : Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
10500 : CVRep - generalization error estimates
10501 :
10502 : -- ALGLIB --
10503 : Copyright 09.12.2007 by Bochkanov Sergey
10504 : *************************************************************************/
10505 0 : void mlpkfoldcvlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams)
10506 : {
10507 : jmp_buf _break_jump;
10508 : alglib_impl::ae_state _alglib_env_state;
10509 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10510 0 : if( setjmp(_break_jump) )
10511 : {
10512 : #if !defined(AE_NO_EXCEPTIONS)
10513 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10514 : #else
10515 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10516 : return;
10517 : #endif
10518 : }
10519 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10520 0 : if( _xparams.flags!=0x0 )
10521 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10522 0 : alglib_impl::mlpkfoldcvlbfgs(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, foldscount, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(cvrep.c_ptr()), &_alglib_env_state);
10523 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10524 0 : return;
10525 : }
10526 :
10527 : /*************************************************************************
10528 : Cross-validation estimate of generalization error.
10529 :
10530 : Base algorithm - Levenberg-Marquardt.
10531 :
10532 : INPUT PARAMETERS:
10533 : Network - neural network with initialized geometry. Network is
10534 : not changed during cross-validation - it is used only
10535 : as a representative of its architecture.
10536 : XY - training set.
10537 : SSize - training set size
10538 : Decay - weight decay, same as in MLPTrainLBFGS
10539 : Restarts - number of restarts, >0.
10540 : restarts are counted for each partition separately, so
10541 : total number of restarts will be Restarts*FoldsCount.
10542 : FoldsCount - number of folds in k-fold cross-validation,
10543 : 2<=FoldsCount<=SSize.
10544 : recommended value: 10.
10545 :
10546 : OUTPUT PARAMETERS:
10547 : Info - return code, same as in MLPTrainLBFGS
10548 : Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
10549 : CVRep - generalization error estimates
10550 :
10551 : -- ALGLIB --
10552 : Copyright 09.12.2007 by Bochkanov Sergey
10553 : *************************************************************************/
10554 0 : void mlpkfoldcvlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams)
10555 : {
10556 : jmp_buf _break_jump;
10557 : alglib_impl::ae_state _alglib_env_state;
10558 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10559 0 : if( setjmp(_break_jump) )
10560 : {
10561 : #if !defined(AE_NO_EXCEPTIONS)
10562 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10563 : #else
10564 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10565 : return;
10566 : #endif
10567 : }
10568 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10569 0 : if( _xparams.flags!=0x0 )
10570 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10571 0 : alglib_impl::mlpkfoldcvlm(const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, foldscount, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(cvrep.c_ptr()), &_alglib_env_state);
10572 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10573 0 : return;
10574 : }
10575 :
10576 : /*************************************************************************
10577 : This function estimates generalization error using cross-validation on the
10578 : current dataset with current training settings.
10579 :
10580 : ! COMMERCIAL EDITION OF ALGLIB:
10581 : !
10582 : ! Commercial Edition of ALGLIB includes following important improvements
10583 : ! of this function:
10584 : ! * high-performance native backend with same C# interface (C# version)
10585 : ! * multithreading support (C++ and C# versions)
10586 : !
10587 : ! We recommend you to read 'Working with commercial version' section of
10588 : ! ALGLIB Reference Manual in order to find out how to use performance-
10589 : ! related features provided by commercial edition of ALGLIB.
10590 :
10591 : INPUT PARAMETERS:
10592 : S - trainer object
10593 : Network - neural network. It must have same number of inputs and
10594 : output/classes as was specified during creation of the
10595 : trainer object. Network is not changed during cross-
10596 : validation and is not trained - it is used only as
10597 : representative of its architecture. I.e., we estimate
10598 : generalization properties of ARCHITECTURE, not some
10599 : specific network.
10600 : NRestarts - number of restarts, >=0:
10601 : * NRestarts>0 means that for each cross-validation
10602 : round specified number of random restarts is
10603 : performed, with best network being chosen after
10604 : training.
10605 : * NRestarts=0 is same as NRestarts=1
10606 : FoldsCount - number of folds in k-fold cross-validation:
10607 : * 2<=FoldsCount<=size of dataset
10608 : * recommended value: 10.
10609 : * values larger than dataset size will be silently
10610 : truncated down to dataset size
10611 :
10612 : OUTPUT PARAMETERS:
10613 : Rep - structure which contains cross-validation estimates:
10614 : * Rep.RelCLSError - fraction of misclassified cases.
10615 : * Rep.AvgCE - acerage cross-entropy
10616 : * Rep.RMSError - root-mean-square error
10617 : * Rep.AvgError - average error
10618 : * Rep.AvgRelError - average relative error
10619 :
10620 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
10621 : or subset with only one point was given, zeros are returned as
10622 : estimates.
10623 :
10624 : NOTE: this method performs FoldsCount cross-validation rounds, each one
10625 : with NRestarts random starts. Thus, FoldsCount*NRestarts networks
10626 : are trained in total.
10627 :
10628 : NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems.
10629 :
10630 : NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError
10631 : contain errors in prediction of posterior probabilities.
10632 :
10633 : -- ALGLIB --
10634 : Copyright 23.07.2012 by Bochkanov Sergey
10635 : *************************************************************************/
10636 0 : void mlpkfoldcv(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, const ae_int_t foldscount, mlpreport &rep, const xparams _xparams)
10637 : {
10638 : jmp_buf _break_jump;
10639 : alglib_impl::ae_state _alglib_env_state;
10640 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10641 0 : if( setjmp(_break_jump) )
10642 : {
10643 : #if !defined(AE_NO_EXCEPTIONS)
10644 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10645 : #else
10646 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10647 : return;
10648 : #endif
10649 : }
10650 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10651 0 : if( _xparams.flags!=0x0 )
10652 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10653 0 : alglib_impl::mlpkfoldcv(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), nrestarts, foldscount, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
10654 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10655 0 : return;
10656 : }
10657 :
10658 : /*************************************************************************
10659 : Creation of the network trainer object for regression networks
10660 :
10661 : INPUT PARAMETERS:
10662 : NIn - number of inputs, NIn>=1
10663 : NOut - number of outputs, NOut>=1
10664 :
10665 : OUTPUT PARAMETERS:
10666 : S - neural network trainer object.
10667 : This structure can be used to train any regression
10668 : network with NIn inputs and NOut outputs.
10669 :
10670 : -- ALGLIB --
10671 : Copyright 23.07.2012 by Bochkanov Sergey
10672 : *************************************************************************/
10673 0 : void mlpcreatetrainer(const ae_int_t nin, const ae_int_t nout, mlptrainer &s, const xparams _xparams)
10674 : {
10675 : jmp_buf _break_jump;
10676 : alglib_impl::ae_state _alglib_env_state;
10677 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10678 0 : if( setjmp(_break_jump) )
10679 : {
10680 : #if !defined(AE_NO_EXCEPTIONS)
10681 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10682 : #else
10683 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10684 : return;
10685 : #endif
10686 : }
10687 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10688 0 : if( _xparams.flags!=0x0 )
10689 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10690 0 : alglib_impl::mlpcreatetrainer(nin, nout, const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
10691 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10692 0 : return;
10693 : }
10694 :
10695 : /*************************************************************************
10696 : Creation of the network trainer object for classification networks
10697 :
10698 : INPUT PARAMETERS:
10699 : NIn - number of inputs, NIn>=1
10700 : NClasses - number of classes, NClasses>=2
10701 :
10702 : OUTPUT PARAMETERS:
10703 : S - neural network trainer object.
10704 : This structure can be used to train any classification
10705 : network with NIn inputs and NOut outputs.
10706 :
10707 : -- ALGLIB --
10708 : Copyright 23.07.2012 by Bochkanov Sergey
10709 : *************************************************************************/
10710 0 : void mlpcreatetrainercls(const ae_int_t nin, const ae_int_t nclasses, mlptrainer &s, const xparams _xparams)
10711 : {
10712 : jmp_buf _break_jump;
10713 : alglib_impl::ae_state _alglib_env_state;
10714 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10715 0 : if( setjmp(_break_jump) )
10716 : {
10717 : #if !defined(AE_NO_EXCEPTIONS)
10718 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10719 : #else
10720 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10721 : return;
10722 : #endif
10723 : }
10724 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10725 0 : if( _xparams.flags!=0x0 )
10726 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10727 0 : alglib_impl::mlpcreatetrainercls(nin, nclasses, const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
10728 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10729 0 : return;
10730 : }
10731 :
10732 : /*************************************************************************
10733 : This function sets "current dataset" of the trainer object to one passed
10734 : by user.
10735 :
10736 : INPUT PARAMETERS:
10737 : S - trainer object
10738 : XY - training set, see below for information on the
10739 : training set format. This function checks correctness
10740 : of the dataset (no NANs/INFs, class numbers are
10741 : correct) and throws exception when incorrect dataset
10742 : is passed.
10743 : NPoints - points count, >=0.
10744 :
10745 : DATASET FORMAT:
10746 :
10747 : This function uses two different dataset formats - one for regression
10748 : networks, another one for classification networks.
10749 :
10750 : For regression networks with NIn inputs and NOut outputs following dataset
10751 : format is used:
10752 : * dataset is given by NPoints*(NIn+NOut) matrix
10753 : * each row corresponds to one example
10754 : * first NIn columns are inputs, next NOut columns are outputs
10755 :
10756 : For classification networks with NIn inputs and NClasses clases following
10757 : datasetformat is used:
10758 : * dataset is given by NPoints*(NIn+1) matrix
10759 : * each row corresponds to one example
10760 : * first NIn columns are inputs, last column stores class number (from 0 to
10761 : NClasses-1).
10762 :
10763 : -- ALGLIB --
10764 : Copyright 23.07.2012 by Bochkanov Sergey
10765 : *************************************************************************/
10766 0 : void mlpsetdataset(const mlptrainer &s, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
10767 : {
10768 : jmp_buf _break_jump;
10769 : alglib_impl::ae_state _alglib_env_state;
10770 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10771 0 : if( setjmp(_break_jump) )
10772 : {
10773 : #if !defined(AE_NO_EXCEPTIONS)
10774 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10775 : #else
10776 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10777 : return;
10778 : #endif
10779 : }
10780 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10781 0 : if( _xparams.flags!=0x0 )
10782 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10783 0 : alglib_impl::mlpsetdataset(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
10784 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10785 0 : return;
10786 : }
10787 :
10788 : /*************************************************************************
10789 : This function sets "current dataset" of the trainer object to one passed
10790 : by user (sparse matrix is used to store dataset).
10791 :
10792 : INPUT PARAMETERS:
10793 : S - trainer object
10794 : XY - training set, see below for information on the
10795 : training set format. This function checks correctness
10796 : of the dataset (no NANs/INFs, class numbers are
10797 : correct) and throws exception when incorrect dataset
10798 : is passed. Any sparse storage format can be used:
10799 : Hash-table, CRS...
10800 : NPoints - points count, >=0
10801 :
10802 : DATASET FORMAT:
10803 :
10804 : This function uses two different dataset formats - one for regression
10805 : networks, another one for classification networks.
10806 :
10807 : For regression networks with NIn inputs and NOut outputs following dataset
10808 : format is used:
10809 : * dataset is given by NPoints*(NIn+NOut) matrix
10810 : * each row corresponds to one example
10811 : * first NIn columns are inputs, next NOut columns are outputs
10812 :
10813 : For classification networks with NIn inputs and NClasses clases following
10814 : datasetformat is used:
10815 : * dataset is given by NPoints*(NIn+1) matrix
10816 : * each row corresponds to one example
10817 : * first NIn columns are inputs, last column stores class number (from 0 to
10818 : NClasses-1).
10819 :
10820 : -- ALGLIB --
10821 : Copyright 23.07.2012 by Bochkanov Sergey
10822 : *************************************************************************/
10823 0 : void mlpsetsparsedataset(const mlptrainer &s, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams)
10824 : {
10825 : jmp_buf _break_jump;
10826 : alglib_impl::ae_state _alglib_env_state;
10827 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10828 0 : if( setjmp(_break_jump) )
10829 : {
10830 : #if !defined(AE_NO_EXCEPTIONS)
10831 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10832 : #else
10833 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10834 : return;
10835 : #endif
10836 : }
10837 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10838 0 : if( _xparams.flags!=0x0 )
10839 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10840 0 : alglib_impl::mlpsetsparsedataset(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::sparsematrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
10841 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10842 0 : return;
10843 : }
10844 :
10845 : /*************************************************************************
10846 : This function sets weight decay coefficient which is used for training.
10847 :
10848 : INPUT PARAMETERS:
10849 : S - trainer object
10850 : Decay - weight decay coefficient, >=0. Weight decay term
10851 : 'Decay*||Weights||^2' is added to error function. If
10852 : you don't know what Decay to choose, use 1.0E-3.
10853 : Weight decay can be set to zero, in this case network
10854 : is trained without weight decay.
10855 :
10856 : NOTE: by default network uses some small nonzero value for weight decay.
10857 :
10858 : -- ALGLIB --
10859 : Copyright 23.07.2012 by Bochkanov Sergey
10860 : *************************************************************************/
10861 0 : void mlpsetdecay(const mlptrainer &s, const double decay, const xparams _xparams)
10862 : {
10863 : jmp_buf _break_jump;
10864 : alglib_impl::ae_state _alglib_env_state;
10865 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10866 0 : if( setjmp(_break_jump) )
10867 : {
10868 : #if !defined(AE_NO_EXCEPTIONS)
10869 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10870 : #else
10871 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10872 : return;
10873 : #endif
10874 : }
10875 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10876 0 : if( _xparams.flags!=0x0 )
10877 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10878 0 : alglib_impl::mlpsetdecay(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), decay, &_alglib_env_state);
10879 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10880 0 : return;
10881 : }
10882 :
10883 : /*************************************************************************
10884 : This function sets stopping criteria for the optimizer.
10885 :
10886 : INPUT PARAMETERS:
10887 : S - trainer object
10888 : WStep - stopping criterion. Algorithm stops if step size is
10889 : less than WStep. Recommended value - 0.01. Zero step
10890 : size means stopping after MaxIts iterations.
10891 : WStep>=0.
10892 : MaxIts - stopping criterion. Algorithm stops after MaxIts
10893 : epochs (full passes over entire dataset). Zero MaxIts
10894 : means stopping when step is sufficiently small.
10895 : MaxIts>=0.
10896 :
10897 : NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also
10898 : used when MLPSetCond() is called with WStep=0 and MaxIts=0.
10899 :
10900 : NOTE: these stopping criteria are used for all kinds of neural training -
10901 : from "conventional" networks to early stopping ensembles. When used
10902 : for "conventional" networks, they are used as the only stopping
10903 : criteria. When combined with early stopping, they used as ADDITIONAL
10904 : stopping criteria which can terminate early stopping algorithm.
10905 :
10906 : -- ALGLIB --
10907 : Copyright 23.07.2012 by Bochkanov Sergey
10908 : *************************************************************************/
10909 0 : void mlpsetcond(const mlptrainer &s, const double wstep, const ae_int_t maxits, const xparams _xparams)
10910 : {
10911 : jmp_buf _break_jump;
10912 : alglib_impl::ae_state _alglib_env_state;
10913 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10914 0 : if( setjmp(_break_jump) )
10915 : {
10916 : #if !defined(AE_NO_EXCEPTIONS)
10917 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10918 : #else
10919 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10920 : return;
10921 : #endif
10922 : }
10923 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10924 0 : if( _xparams.flags!=0x0 )
10925 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10926 0 : alglib_impl::mlpsetcond(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), wstep, maxits, &_alglib_env_state);
10927 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10928 0 : return;
10929 : }
10930 :
10931 : /*************************************************************************
10932 : This function sets training algorithm: batch training using L-BFGS will be
10933 : used.
10934 :
10935 : This algorithm:
10936 : * the most robust for small-scale problems, but may be too slow for large
10937 : scale ones.
10938 : * perfoms full pass through the dataset before performing step
10939 : * uses conditions specified by MLPSetCond() for stopping
10940 : * is default one used by trainer object
10941 :
10942 : INPUT PARAMETERS:
10943 : S - trainer object
10944 :
10945 : -- ALGLIB --
10946 : Copyright 23.07.2012 by Bochkanov Sergey
10947 : *************************************************************************/
10948 0 : void mlpsetalgobatch(const mlptrainer &s, const xparams _xparams)
10949 : {
10950 : jmp_buf _break_jump;
10951 : alglib_impl::ae_state _alglib_env_state;
10952 0 : alglib_impl::ae_state_init(&_alglib_env_state);
10953 0 : if( setjmp(_break_jump) )
10954 : {
10955 : #if !defined(AE_NO_EXCEPTIONS)
10956 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
10957 : #else
10958 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
10959 : return;
10960 : #endif
10961 : }
10962 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
10963 0 : if( _xparams.flags!=0x0 )
10964 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
10965 0 : alglib_impl::mlpsetalgobatch(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), &_alglib_env_state);
10966 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
10967 0 : return;
10968 : }
10969 :
10970 : /*************************************************************************
10971 : This function trains neural network passed to this function, using current
10972 : dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
10973 : and current training settings. Training from NRestarts random starting
10974 : positions is performed, best network is chosen.
10975 :
10976 : Training is performed using current training algorithm.
10977 :
10978 : ! COMMERCIAL EDITION OF ALGLIB:
10979 : !
10980 : ! Commercial Edition of ALGLIB includes following important improvements
10981 : ! of this function:
10982 : ! * high-performance native backend with same C# interface (C# version)
10983 : ! * multithreading support (C++ and C# versions)
10984 : !
10985 : ! We recommend you to read 'Working with commercial version' section of
10986 : ! ALGLIB Reference Manual in order to find out how to use performance-
10987 : ! related features provided by commercial edition of ALGLIB.
10988 :
10989 : INPUT PARAMETERS:
10990 : S - trainer object
10991 : Network - neural network. It must have same number of inputs and
10992 : output/classes as was specified during creation of the
10993 : trainer object.
10994 : NRestarts - number of restarts, >=0:
10995 : * NRestarts>0 means that specified number of random
10996 : restarts are performed, best network is chosen after
10997 : training
10998 : * NRestarts=0 means that current state of the network
10999 : is used for training.
11000 :
11001 : OUTPUT PARAMETERS:
11002 : Network - trained network
11003 :
11004 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
11005 : network is filled by zero values. Same behavior for functions
11006 : MLPStartTraining and MLPContinueTraining.
11007 :
11008 : NOTE: this method uses sum-of-squares error function for training.
11009 :
11010 : -- ALGLIB --
11011 : Copyright 23.07.2012 by Bochkanov Sergey
11012 : *************************************************************************/
11013 0 : void mlptrainnetwork(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams)
11014 : {
11015 : jmp_buf _break_jump;
11016 : alglib_impl::ae_state _alglib_env_state;
11017 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11018 0 : if( setjmp(_break_jump) )
11019 : {
11020 : #if !defined(AE_NO_EXCEPTIONS)
11021 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11022 : #else
11023 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11024 : return;
11025 : #endif
11026 : }
11027 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11028 0 : if( _xparams.flags!=0x0 )
11029 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11030 0 : alglib_impl::mlptrainnetwork(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), nrestarts, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
11031 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11032 0 : return;
11033 : }
11034 :
11035 : /*************************************************************************
11036 : IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
11037 : not recommend you to use it unless you are pretty sure that you
11038 : need ability to monitor training progress.
11039 :
11040 : This function performs step-by-step training of the neural network. Here
11041 : "step-by-step" means that training starts with MLPStartTraining() call,
11042 : and then user subsequently calls MLPContinueTraining() to perform one more
11043 : iteration of the training.
11044 :
11045 : After call to this function trainer object remembers network and is ready
11046 : to train it. However, no training is performed until first call to
11047 : MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
11048 : will advance training progress one iteration further.
11049 :
11050 : EXAMPLE:
11051 : >
11052 : > ...initialize network and trainer object....
11053 : >
11054 : > MLPStartTraining(Trainer, Network, True)
11055 : > while MLPContinueTraining(Trainer, Network) do
11056 : > ...visualize training progress...
11057 : >
11058 :
11059 : INPUT PARAMETERS:
11060 : S - trainer object
11061 : Network - neural network. It must have same number of inputs and
11062 : output/classes as was specified during creation of the
11063 : trainer object.
11064 : RandomStart - randomize network before training or not:
11065 : * True means that network is randomized and its
11066 : initial state (one which was passed to the trainer
11067 : object) is lost.
11068 : * False means that training is started from the
11069 : current state of the network
11070 :
11071 : OUTPUT PARAMETERS:
11072 : Network - neural network which is ready to training (weights are
11073 : initialized, preprocessor is initialized using current
11074 : training set)
11075 :
11076 : NOTE: this method uses sum-of-squares error function for training.
11077 :
11078 : NOTE: it is expected that trainer object settings are NOT changed during
11079 : step-by-step training, i.e. no one changes stopping criteria or
11080 : training set during training. It is possible and there is no defense
11081 : against such actions, but algorithm behavior in such cases is
11082 : undefined and can be unpredictable.
11083 :
11084 : -- ALGLIB --
11085 : Copyright 23.07.2012 by Bochkanov Sergey
11086 : *************************************************************************/
11087 0 : void mlpstarttraining(const mlptrainer &s, const multilayerperceptron &network, const bool randomstart, const xparams _xparams)
11088 : {
11089 : jmp_buf _break_jump;
11090 : alglib_impl::ae_state _alglib_env_state;
11091 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11092 0 : if( setjmp(_break_jump) )
11093 : {
11094 : #if !defined(AE_NO_EXCEPTIONS)
11095 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11096 : #else
11097 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11098 : return;
11099 : #endif
11100 : }
11101 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11102 0 : if( _xparams.flags!=0x0 )
11103 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11104 0 : alglib_impl::mlpstarttraining(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), randomstart, &_alglib_env_state);
11105 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11106 0 : return;
11107 : }
11108 :
11109 : /*************************************************************************
11110 : IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
11111 : not recommend you to use it unless you are pretty sure that you
11112 : need ability to monitor training progress.
11113 :
11114 : ! COMMERCIAL EDITION OF ALGLIB:
11115 : !
11116 : ! Commercial Edition of ALGLIB includes following important improvements
11117 : ! of this function:
11118 : ! * high-performance native backend with same C# interface (C# version)
11119 : ! * multithreading support (C++ and C# versions)
11120 : !
11121 : ! We recommend you to read 'Working with commercial version' section of
11122 : ! ALGLIB Reference Manual in order to find out how to use performance-
11123 : ! related features provided by commercial edition of ALGLIB.
11124 :
11125 : This function performs step-by-step training of the neural network. Here
11126 : "step-by-step" means that training starts with MLPStartTraining() call,
11127 : and then user subsequently calls MLPContinueTraining() to perform one more
11128 : iteration of the training.
11129 :
11130 : This function performs one more iteration of the training and returns
11131 : either True (training continues) or False (training stopped). In case True
11132 : was returned, Network weights are updated according to the current state
11133 : of the optimization progress. In case False was returned, no additional
11134 : updates is performed (previous update of the network weights moved us to
11135 : the final point, and no additional updates is needed).
11136 :
11137 : EXAMPLE:
11138 : >
11139 : > [initialize network and trainer object]
11140 : >
11141 : > MLPStartTraining(Trainer, Network, True)
11142 : > while MLPContinueTraining(Trainer, Network) do
11143 : > [visualize training progress]
11144 : >
11145 :
11146 : INPUT PARAMETERS:
11147 : S - trainer object
11148 : Network - neural network structure, which is used to store
11149 : current state of the training process.
11150 :
11151 : OUTPUT PARAMETERS:
11152 : Network - weights of the neural network are rewritten by the
11153 : current approximation.
11154 :
11155 : NOTE: this method uses sum-of-squares error function for training.
11156 :
11157 : NOTE: it is expected that trainer object settings are NOT changed during
11158 : step-by-step training, i.e. no one changes stopping criteria or
11159 : training set during training. It is possible and there is no defense
11160 : against such actions, but algorithm behavior in such cases is
11161 : undefined and can be unpredictable.
11162 :
11163 : NOTE: It is expected that Network is the same one which was passed to
11164 : MLPStartTraining() function. However, THIS function checks only
11165 : following:
11166 : * that number of network inputs is consistent with trainer object
11167 : settings
11168 : * that number of network outputs/classes is consistent with trainer
11169 : object settings
11170 : * that number of network weights is the same as number of weights in
11171 : the network passed to MLPStartTraining() function
11172 : Exception is thrown when these conditions are violated.
11173 :
11174 : It is also expected that you do not change state of the network on
11175 : your own - the only party who has right to change network during its
11176 : training is a trainer object. Any attempt to interfere with trainer
11177 : may lead to unpredictable results.
11178 :
11179 :
11180 : -- ALGLIB --
11181 : Copyright 23.07.2012 by Bochkanov Sergey
11182 : *************************************************************************/
11183 0 : bool mlpcontinuetraining(const mlptrainer &s, const multilayerperceptron &network, const xparams _xparams)
11184 : {
11185 : jmp_buf _break_jump;
11186 : alglib_impl::ae_state _alglib_env_state;
11187 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11188 0 : if( setjmp(_break_jump) )
11189 : {
11190 : #if !defined(AE_NO_EXCEPTIONS)
11191 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11192 : #else
11193 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11194 : return 0;
11195 : #endif
11196 : }
11197 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11198 0 : if( _xparams.flags!=0x0 )
11199 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11200 0 : ae_bool result = alglib_impl::mlpcontinuetraining(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::multilayerperceptron*>(network.c_ptr()), &_alglib_env_state);
11201 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11202 0 : return *(reinterpret_cast<bool*>(&result));
11203 : }
11204 :
11205 : /*************************************************************************
11206 : Training neural networks ensemble using bootstrap aggregating (bagging).
11207 : Modified Levenberg-Marquardt algorithm is used as base training method.
11208 :
11209 : INPUT PARAMETERS:
11210 : Ensemble - model with initialized geometry
11211 : XY - training set
11212 : NPoints - training set size
11213 : Decay - weight decay coefficient, >=0.001
11214 : Restarts - restarts, >0.
11215 :
11216 : OUTPUT PARAMETERS:
11217 : Ensemble - trained model
11218 : Info - return code:
11219 : * -2, if there is a point with class number
11220 : outside of [0..NClasses-1].
11221 : * -1, if incorrect parameters was passed
11222 : (NPoints<0, Restarts<1).
11223 : * 2, if task has been solved.
11224 : Rep - training report.
11225 : OOBErrors - out-of-bag generalization error estimate
11226 :
11227 : -- ALGLIB --
11228 : Copyright 17.02.2009 by Bochkanov Sergey
11229 : *************************************************************************/
11230 0 : void mlpebagginglm(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams)
11231 : {
11232 : jmp_buf _break_jump;
11233 : alglib_impl::ae_state _alglib_env_state;
11234 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11235 0 : if( setjmp(_break_jump) )
11236 : {
11237 : #if !defined(AE_NO_EXCEPTIONS)
11238 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11239 : #else
11240 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11241 : return;
11242 : #endif
11243 : }
11244 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11245 0 : if( _xparams.flags!=0x0 )
11246 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11247 0 : alglib_impl::mlpebagginglm(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(ooberrors.c_ptr()), &_alglib_env_state);
11248 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11249 0 : return;
11250 : }
11251 :
11252 : /*************************************************************************
11253 : Training neural networks ensemble using bootstrap aggregating (bagging).
11254 : L-BFGS algorithm is used as base training method.
11255 :
11256 : INPUT PARAMETERS:
11257 : Ensemble - model with initialized geometry
11258 : XY - training set
11259 : NPoints - training set size
11260 : Decay - weight decay coefficient, >=0.001
11261 : Restarts - restarts, >0.
11262 : WStep - stopping criterion, same as in MLPTrainLBFGS
11263 : MaxIts - stopping criterion, same as in MLPTrainLBFGS
11264 :
11265 : OUTPUT PARAMETERS:
11266 : Ensemble - trained model
11267 : Info - return code:
11268 : * -8, if both WStep=0 and MaxIts=0
11269 : * -2, if there is a point with class number
11270 : outside of [0..NClasses-1].
11271 : * -1, if incorrect parameters was passed
11272 : (NPoints<0, Restarts<1).
11273 : * 2, if task has been solved.
11274 : Rep - training report.
11275 : OOBErrors - out-of-bag generalization error estimate
11276 :
11277 : -- ALGLIB --
11278 : Copyright 17.02.2009 by Bochkanov Sergey
11279 : *************************************************************************/
11280 0 : void mlpebagginglbfgs(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams)
11281 : {
11282 : jmp_buf _break_jump;
11283 : alglib_impl::ae_state _alglib_env_state;
11284 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11285 0 : if( setjmp(_break_jump) )
11286 : {
11287 : #if !defined(AE_NO_EXCEPTIONS)
11288 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11289 : #else
11290 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11291 : return;
11292 : #endif
11293 : }
11294 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11295 0 : if( _xparams.flags!=0x0 )
11296 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11297 0 : alglib_impl::mlpebagginglbfgs(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, wstep, maxits, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), const_cast<alglib_impl::mlpcvreport*>(ooberrors.c_ptr()), &_alglib_env_state);
11298 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11299 0 : return;
11300 : }
11301 :
11302 : /*************************************************************************
11303 : Training neural networks ensemble using early stopping.
11304 :
11305 : INPUT PARAMETERS:
11306 : Ensemble - model with initialized geometry
11307 : XY - training set
11308 : NPoints - training set size
11309 : Decay - weight decay coefficient, >=0.001
11310 : Restarts - restarts, >0.
11311 :
11312 : OUTPUT PARAMETERS:
11313 : Ensemble - trained model
11314 : Info - return code:
11315 : * -2, if there is a point with class number
11316 : outside of [0..NClasses-1].
11317 : * -1, if incorrect parameters was passed
11318 : (NPoints<0, Restarts<1).
11319 : * 6, if task has been solved.
11320 : Rep - training report.
11321 : OOBErrors - out-of-bag generalization error estimate
11322 :
11323 : -- ALGLIB --
11324 : Copyright 10.03.2009 by Bochkanov Sergey
11325 : *************************************************************************/
11326 0 : void mlpetraines(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams)
11327 : {
11328 : jmp_buf _break_jump;
11329 : alglib_impl::ae_state _alglib_env_state;
11330 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11331 0 : if( setjmp(_break_jump) )
11332 : {
11333 : #if !defined(AE_NO_EXCEPTIONS)
11334 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11335 : #else
11336 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11337 : return;
11338 : #endif
11339 : }
11340 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11341 0 : if( _xparams.flags!=0x0 )
11342 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11343 0 : alglib_impl::mlpetraines(const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, decay, restarts, &info, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
11344 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11345 0 : return;
11346 : }
11347 :
11348 : /*************************************************************************
11349 : This function trains neural network ensemble passed to this function using
11350 : current dataset and early stopping training algorithm. Each early stopping
11351 : round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
11352 : training rounds is performed in total).
11353 :
11354 : ! COMMERCIAL EDITION OF ALGLIB:
11355 : !
11356 : ! Commercial Edition of ALGLIB includes following important improvements
11357 : ! of this function:
11358 : ! * high-performance native backend with same C# interface (C# version)
11359 : ! * multithreading support (C++ and C# versions)
11360 : !
11361 : ! We recommend you to read 'Working with commercial version' section of
11362 : ! ALGLIB Reference Manual in order to find out how to use performance-
11363 : ! related features provided by commercial edition of ALGLIB.
11364 :
11365 : INPUT PARAMETERS:
11366 : S - trainer object;
11367 : Ensemble - neural network ensemble. It must have same number of
11368 : inputs and outputs/classes as was specified during
11369 : creation of the trainer object.
11370 : NRestarts - number of restarts, >=0:
11371 : * NRestarts>0 means that specified number of random
11372 : restarts are performed during each ES round;
11373 : * NRestarts=0 is silently replaced by 1.
11374 :
11375 : OUTPUT PARAMETERS:
11376 : Ensemble - trained ensemble;
11377 : Rep - it contains all type of errors.
11378 :
11379 : NOTE: this training method uses BOTH early stopping and weight decay! So,
11380 : you should select weight decay before starting training just as you
11381 : select it before training "conventional" networks.
11382 :
11383 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
11384 : or single-point dataset was passed, ensemble is filled by zero
11385 : values.
11386 :
11387 : NOTE: this method uses sum-of-squares error function for training.
11388 :
11389 : -- ALGLIB --
11390 : Copyright 22.08.2012 by Bochkanov Sergey
11391 : *************************************************************************/
11392 0 : void mlptrainensemblees(const mlptrainer &s, const mlpensemble &ensemble, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams)
11393 : {
11394 : jmp_buf _break_jump;
11395 : alglib_impl::ae_state _alglib_env_state;
11396 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11397 0 : if( setjmp(_break_jump) )
11398 : {
11399 : #if !defined(AE_NO_EXCEPTIONS)
11400 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11401 : #else
11402 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11403 : return;
11404 : #endif
11405 : }
11406 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11407 0 : if( _xparams.flags!=0x0 )
11408 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11409 0 : alglib_impl::mlptrainensemblees(const_cast<alglib_impl::mlptrainer*>(s.c_ptr()), const_cast<alglib_impl::mlpensemble*>(ensemble.c_ptr()), nrestarts, const_cast<alglib_impl::mlpreport*>(rep.c_ptr()), &_alglib_env_state);
11410 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11411 0 : return;
11412 : }
11413 : #endif
11414 :
11415 : #if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
11416 : /*************************************************************************
11417 : This structure is a clusterization engine.
11418 :
11419 : You should not try to access its fields directly.
11420 : Use ALGLIB functions in order to work with this object.
11421 :
11422 : -- ALGLIB --
11423 : Copyright 10.07.2012 by Bochkanov Sergey
11424 : *************************************************************************/
11425 0 : _clusterizerstate_owner::_clusterizerstate_owner()
11426 : {
11427 : jmp_buf _break_jump;
11428 : alglib_impl::ae_state _state;
11429 :
11430 0 : alglib_impl::ae_state_init(&_state);
11431 0 : if( setjmp(_break_jump) )
11432 : {
11433 0 : if( p_struct!=NULL )
11434 : {
11435 0 : alglib_impl::_clusterizerstate_destroy(p_struct);
11436 0 : alglib_impl::ae_free(p_struct);
11437 : }
11438 0 : p_struct = NULL;
11439 : #if !defined(AE_NO_EXCEPTIONS)
11440 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11441 : #else
11442 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11443 : return;
11444 : #endif
11445 : }
11446 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11447 0 : p_struct = NULL;
11448 0 : p_struct = (alglib_impl::clusterizerstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::clusterizerstate), &_state);
11449 0 : memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
11450 0 : alglib_impl::_clusterizerstate_init(p_struct, &_state, ae_false);
11451 0 : ae_state_clear(&_state);
11452 0 : }
11453 :
11454 0 : _clusterizerstate_owner::_clusterizerstate_owner(const _clusterizerstate_owner &rhs)
11455 : {
11456 : jmp_buf _break_jump;
11457 : alglib_impl::ae_state _state;
11458 :
11459 0 : alglib_impl::ae_state_init(&_state);
11460 0 : if( setjmp(_break_jump) )
11461 : {
11462 0 : if( p_struct!=NULL )
11463 : {
11464 0 : alglib_impl::_clusterizerstate_destroy(p_struct);
11465 0 : alglib_impl::ae_free(p_struct);
11466 : }
11467 0 : p_struct = NULL;
11468 : #if !defined(AE_NO_EXCEPTIONS)
11469 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11470 : #else
11471 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11472 : return;
11473 : #endif
11474 : }
11475 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11476 0 : p_struct = NULL;
11477 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: clusterizerstate copy constructor failure (source is not initialized)", &_state);
11478 0 : p_struct = (alglib_impl::clusterizerstate*)alglib_impl::ae_malloc(sizeof(alglib_impl::clusterizerstate), &_state);
11479 0 : memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
11480 0 : alglib_impl::_clusterizerstate_init_copy(p_struct, const_cast<alglib_impl::clusterizerstate*>(rhs.p_struct), &_state, ae_false);
11481 0 : ae_state_clear(&_state);
11482 0 : }
11483 :
11484 0 : _clusterizerstate_owner& _clusterizerstate_owner::operator=(const _clusterizerstate_owner &rhs)
11485 : {
11486 0 : if( this==&rhs )
11487 0 : return *this;
11488 : jmp_buf _break_jump;
11489 : alglib_impl::ae_state _state;
11490 :
11491 0 : alglib_impl::ae_state_init(&_state);
11492 0 : if( setjmp(_break_jump) )
11493 : {
11494 : #if !defined(AE_NO_EXCEPTIONS)
11495 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11496 : #else
11497 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11498 : return *this;
11499 : #endif
11500 : }
11501 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11502 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: clusterizerstate assignment constructor failure (destination is not initialized)", &_state);
11503 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: clusterizerstate assignment constructor failure (source is not initialized)", &_state);
11504 0 : alglib_impl::_clusterizerstate_destroy(p_struct);
11505 0 : memset(p_struct, 0, sizeof(alglib_impl::clusterizerstate));
11506 0 : alglib_impl::_clusterizerstate_init_copy(p_struct, const_cast<alglib_impl::clusterizerstate*>(rhs.p_struct), &_state, ae_false);
11507 0 : ae_state_clear(&_state);
11508 0 : return *this;
11509 : }
11510 :
11511 0 : _clusterizerstate_owner::~_clusterizerstate_owner()
11512 : {
11513 0 : if( p_struct!=NULL )
11514 : {
11515 0 : alglib_impl::_clusterizerstate_destroy(p_struct);
11516 0 : ae_free(p_struct);
11517 : }
11518 0 : }
11519 :
11520 0 : alglib_impl::clusterizerstate* _clusterizerstate_owner::c_ptr()
11521 : {
11522 0 : return p_struct;
11523 : }
11524 :
11525 0 : alglib_impl::clusterizerstate* _clusterizerstate_owner::c_ptr() const
11526 : {
11527 0 : return const_cast<alglib_impl::clusterizerstate*>(p_struct);
11528 : }
11529 0 : clusterizerstate::clusterizerstate() : _clusterizerstate_owner()
11530 : {
11531 0 : }
11532 :
11533 0 : clusterizerstate::clusterizerstate(const clusterizerstate &rhs):_clusterizerstate_owner(rhs)
11534 : {
11535 0 : }
11536 :
11537 0 : clusterizerstate& clusterizerstate::operator=(const clusterizerstate &rhs)
11538 : {
11539 0 : if( this==&rhs )
11540 0 : return *this;
11541 0 : _clusterizerstate_owner::operator=(rhs);
11542 0 : return *this;
11543 : }
11544 :
11545 0 : clusterizerstate::~clusterizerstate()
11546 : {
11547 0 : }
11548 :
11549 :
11550 : /*************************************************************************
11551 : This structure is used to store results of the agglomerative hierarchical
11552 : clustering (AHC).
11553 :
11554 : Following information is returned:
11555 :
11556 : * TerminationType - completion code:
11557 : * 1 for successful completion of algorithm
11558 : * -5 inappropriate combination of clustering algorithm and distance
11559 : function was used. As for now, it is possible only when Ward's
11560 : method is called for dataset with non-Euclidean distance function.
11561 : In case negative completion code is returned, other fields of report
11562 : structure are invalid and should not be used.
11563 :
11564 : * NPoints contains number of points in the original dataset
11565 :
11566 : * Z contains information about merges performed (see below). Z contains
11567 : indexes from the original (unsorted) dataset and it can be used when you
11568 : need to know what points were merged. However, it is not convenient when
11569 : you want to build a dendrograd (see below).
11570 :
11571 : * if you want to build dendrogram, you can use Z, but it is not good
11572 : option, because Z contains indexes from unsorted dataset. Dendrogram
11573 : built from such dataset is likely to have intersections. So, you have to
11574 : reorder you points before building dendrogram.
11575 : Permutation which reorders point is returned in P. Another representation
11576 : of merges, which is more convenient for dendorgram construction, is
11577 : returned in PM.
11578 :
11579 : * more information on format of Z, P and PM can be found below and in the
11580 : examples from ALGLIB Reference Manual.
11581 :
11582 : FORMAL DESCRIPTION OF FIELDS:
11583 : NPoints number of points
11584 : Z array[NPoints-1,2], contains indexes of clusters
11585 : linked in pairs to form clustering tree. I-th row
11586 : corresponds to I-th merge:
11587 : * Z[I,0] - index of the first cluster to merge
11588 : * Z[I,1] - index of the second cluster to merge
11589 : * Z[I,0]<Z[I,1]
11590 : * clusters are numbered from 0 to 2*NPoints-2, with
11591 : indexes from 0 to NPoints-1 corresponding to points
11592 : of the original dataset, and indexes from NPoints to
11593 : 2*NPoints-2 correspond to clusters generated by
11594 : subsequent merges (I-th row of Z creates cluster
11595 : with index NPoints+I).
11596 :
11597 : IMPORTANT: indexes in Z[] are indexes in the ORIGINAL,
11598 : unsorted dataset. In addition to Z algorithm outputs
11599 : permutation which rearranges points in such way that
11600 : subsequent merges are performed on adjacent points
11601 : (such order is needed if you want to build dendrogram).
11602 : However, indexes in Z are related to original,
11603 : unrearranged sequence of points.
11604 :
11605 : P array[NPoints], permutation which reorders points for
11606 : dendrogram construction. P[i] contains index of the
11607 : position where we should move I-th point of the
11608 : original dataset in order to apply merges PZ/PM.
11609 :
11610 : PZ same as Z, but for permutation of points given by P.
11611 : The only thing which changed are indexes of the
11612 : original points; indexes of clusters remained same.
11613 :
11614 : MergeDist array[NPoints-1], contains distances between clusters
11615 : being merged (MergeDist[i] correspond to merge stored
11616 : in Z[i,...]):
11617 : * CLINK, SLINK and average linkage algorithms report
11618 : "raw", unmodified distance metric.
11619 : * Ward's method reports weighted intra-cluster
11620 : variance, which is equal to ||Ca-Cb||^2 * Sa*Sb/(Sa+Sb).
11621 : Here A and B are clusters being merged, Ca is a
11622 : center of A, Cb is a center of B, Sa is a size of A,
11623 : Sb is a size of B.
11624 :
11625 : PM array[NPoints-1,6], another representation of merges,
11626 : which is suited for dendrogram construction. It deals
11627 : with rearranged points (permutation P is applied) and
11628 : represents merges in a form which different from one
11629 : used by Z.
11630 : For each I from 0 to NPoints-2, I-th row of PM represents
11631 : merge performed on two clusters C0 and C1. Here:
11632 : * C0 contains points with indexes PM[I,0]...PM[I,1]
11633 : * C1 contains points with indexes PM[I,2]...PM[I,3]
11634 : * indexes stored in PM are given for dataset sorted
11635 : according to permutation P
11636 : * PM[I,1]=PM[I,2]-1 (only adjacent clusters are merged)
11637 : * PM[I,0]<=PM[I,1], PM[I,2]<=PM[I,3], i.e. both
11638 : clusters contain at least one point
11639 : * heights of "subdendrograms" corresponding to C0/C1
11640 : are stored in PM[I,4] and PM[I,5]. Subdendrograms
11641 : corresponding to single-point clusters have
11642 : height=0. Dendrogram of the merge result has height
11643 : H=max(H0,H1)+1.
11644 :
11645 : NOTE: there is one-to-one correspondence between merges described by Z and
11646 : PM. I-th row of Z describes same merge of clusters as I-th row of PM,
11647 : with "left" cluster from Z corresponding to the "left" one from PM.
11648 :
11649 : -- ALGLIB --
11650 : Copyright 10.07.2012 by Bochkanov Sergey
11651 : *************************************************************************/
11652 0 : _ahcreport_owner::_ahcreport_owner()
11653 : {
11654 : jmp_buf _break_jump;
11655 : alglib_impl::ae_state _state;
11656 :
11657 0 : alglib_impl::ae_state_init(&_state);
11658 0 : if( setjmp(_break_jump) )
11659 : {
11660 0 : if( p_struct!=NULL )
11661 : {
11662 0 : alglib_impl::_ahcreport_destroy(p_struct);
11663 0 : alglib_impl::ae_free(p_struct);
11664 : }
11665 0 : p_struct = NULL;
11666 : #if !defined(AE_NO_EXCEPTIONS)
11667 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11668 : #else
11669 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11670 : return;
11671 : #endif
11672 : }
11673 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11674 0 : p_struct = NULL;
11675 0 : p_struct = (alglib_impl::ahcreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::ahcreport), &_state);
11676 0 : memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
11677 0 : alglib_impl::_ahcreport_init(p_struct, &_state, ae_false);
11678 0 : ae_state_clear(&_state);
11679 0 : }
11680 :
11681 0 : _ahcreport_owner::_ahcreport_owner(const _ahcreport_owner &rhs)
11682 : {
11683 : jmp_buf _break_jump;
11684 : alglib_impl::ae_state _state;
11685 :
11686 0 : alglib_impl::ae_state_init(&_state);
11687 0 : if( setjmp(_break_jump) )
11688 : {
11689 0 : if( p_struct!=NULL )
11690 : {
11691 0 : alglib_impl::_ahcreport_destroy(p_struct);
11692 0 : alglib_impl::ae_free(p_struct);
11693 : }
11694 0 : p_struct = NULL;
11695 : #if !defined(AE_NO_EXCEPTIONS)
11696 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11697 : #else
11698 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11699 : return;
11700 : #endif
11701 : }
11702 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11703 0 : p_struct = NULL;
11704 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ahcreport copy constructor failure (source is not initialized)", &_state);
11705 0 : p_struct = (alglib_impl::ahcreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::ahcreport), &_state);
11706 0 : memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
11707 0 : alglib_impl::_ahcreport_init_copy(p_struct, const_cast<alglib_impl::ahcreport*>(rhs.p_struct), &_state, ae_false);
11708 0 : ae_state_clear(&_state);
11709 0 : }
11710 :
11711 0 : _ahcreport_owner& _ahcreport_owner::operator=(const _ahcreport_owner &rhs)
11712 : {
11713 0 : if( this==&rhs )
11714 0 : return *this;
11715 : jmp_buf _break_jump;
11716 : alglib_impl::ae_state _state;
11717 :
11718 0 : alglib_impl::ae_state_init(&_state);
11719 0 : if( setjmp(_break_jump) )
11720 : {
11721 : #if !defined(AE_NO_EXCEPTIONS)
11722 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11723 : #else
11724 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11725 : return *this;
11726 : #endif
11727 : }
11728 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11729 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: ahcreport assignment constructor failure (destination is not initialized)", &_state);
11730 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: ahcreport assignment constructor failure (source is not initialized)", &_state);
11731 0 : alglib_impl::_ahcreport_destroy(p_struct);
11732 0 : memset(p_struct, 0, sizeof(alglib_impl::ahcreport));
11733 0 : alglib_impl::_ahcreport_init_copy(p_struct, const_cast<alglib_impl::ahcreport*>(rhs.p_struct), &_state, ae_false);
11734 0 : ae_state_clear(&_state);
11735 0 : return *this;
11736 : }
11737 :
11738 0 : _ahcreport_owner::~_ahcreport_owner()
11739 : {
11740 0 : if( p_struct!=NULL )
11741 : {
11742 0 : alglib_impl::_ahcreport_destroy(p_struct);
11743 0 : ae_free(p_struct);
11744 : }
11745 0 : }
11746 :
11747 0 : alglib_impl::ahcreport* _ahcreport_owner::c_ptr()
11748 : {
11749 0 : return p_struct;
11750 : }
11751 :
11752 0 : alglib_impl::ahcreport* _ahcreport_owner::c_ptr() const
11753 : {
11754 0 : return const_cast<alglib_impl::ahcreport*>(p_struct);
11755 : }
11756 0 : ahcreport::ahcreport() : _ahcreport_owner() ,terminationtype(p_struct->terminationtype),npoints(p_struct->npoints),p(&p_struct->p),z(&p_struct->z),pz(&p_struct->pz),pm(&p_struct->pm),mergedist(&p_struct->mergedist)
11757 : {
11758 0 : }
11759 :
11760 0 : ahcreport::ahcreport(const ahcreport &rhs):_ahcreport_owner(rhs) ,terminationtype(p_struct->terminationtype),npoints(p_struct->npoints),p(&p_struct->p),z(&p_struct->z),pz(&p_struct->pz),pm(&p_struct->pm),mergedist(&p_struct->mergedist)
11761 : {
11762 0 : }
11763 :
11764 0 : ahcreport& ahcreport::operator=(const ahcreport &rhs)
11765 : {
11766 0 : if( this==&rhs )
11767 0 : return *this;
11768 0 : _ahcreport_owner::operator=(rhs);
11769 0 : return *this;
11770 : }
11771 :
11772 0 : ahcreport::~ahcreport()
11773 : {
11774 0 : }
11775 :
11776 :
11777 : /*************************************************************************
11778 : This structure is used to store results of the k-means clustering
11779 : algorithm.
11780 :
11781 : Following information is always returned:
11782 : * NPoints contains number of points in the original dataset
11783 : * TerminationType contains completion code, negative on failure, positive
11784 : on success
11785 : * K contains number of clusters
11786 :
11787 : For positive TerminationType we return:
11788 : * NFeatures contains number of variables in the original dataset
11789 : * C, which contains centers found by algorithm
11790 : * CIdx, which maps points of the original dataset to clusters
11791 :
11792 : FORMAL DESCRIPTION OF FIELDS:
11793 : NPoints number of points, >=0
11794 : NFeatures number of variables, >=1
11795 : TerminationType completion code:
11796 : * -5 if distance type is anything different from
11797 : Euclidean metric
11798 : * -3 for degenerate dataset: a) less than K distinct
11799 : points, b) K=0 for non-empty dataset.
11800 : * +1 for successful completion
11801 : K number of clusters
11802 : C array[K,NFeatures], rows of the array store centers
11803 : CIdx array[NPoints], which contains cluster indexes
11804 : IterationsCount actual number of iterations performed by clusterizer.
11805 : If algorithm performed more than one random restart,
11806 : total number of iterations is returned.
11807 : Energy merit function, "energy", sum of squared deviations
11808 : from cluster centers
11809 :
11810 : -- ALGLIB --
11811 : Copyright 27.11.2012 by Bochkanov Sergey
11812 : *************************************************************************/
11813 0 : _kmeansreport_owner::_kmeansreport_owner()
11814 : {
11815 : jmp_buf _break_jump;
11816 : alglib_impl::ae_state _state;
11817 :
11818 0 : alglib_impl::ae_state_init(&_state);
11819 0 : if( setjmp(_break_jump) )
11820 : {
11821 0 : if( p_struct!=NULL )
11822 : {
11823 0 : alglib_impl::_kmeansreport_destroy(p_struct);
11824 0 : alglib_impl::ae_free(p_struct);
11825 : }
11826 0 : p_struct = NULL;
11827 : #if !defined(AE_NO_EXCEPTIONS)
11828 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11829 : #else
11830 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11831 : return;
11832 : #endif
11833 : }
11834 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11835 0 : p_struct = NULL;
11836 0 : p_struct = (alglib_impl::kmeansreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::kmeansreport), &_state);
11837 0 : memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
11838 0 : alglib_impl::_kmeansreport_init(p_struct, &_state, ae_false);
11839 0 : ae_state_clear(&_state);
11840 0 : }
11841 :
11842 0 : _kmeansreport_owner::_kmeansreport_owner(const _kmeansreport_owner &rhs)
11843 : {
11844 : jmp_buf _break_jump;
11845 : alglib_impl::ae_state _state;
11846 :
11847 0 : alglib_impl::ae_state_init(&_state);
11848 0 : if( setjmp(_break_jump) )
11849 : {
11850 0 : if( p_struct!=NULL )
11851 : {
11852 0 : alglib_impl::_kmeansreport_destroy(p_struct);
11853 0 : alglib_impl::ae_free(p_struct);
11854 : }
11855 0 : p_struct = NULL;
11856 : #if !defined(AE_NO_EXCEPTIONS)
11857 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11858 : #else
11859 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11860 : return;
11861 : #endif
11862 : }
11863 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11864 0 : p_struct = NULL;
11865 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: kmeansreport copy constructor failure (source is not initialized)", &_state);
11866 0 : p_struct = (alglib_impl::kmeansreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::kmeansreport), &_state);
11867 0 : memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
11868 0 : alglib_impl::_kmeansreport_init_copy(p_struct, const_cast<alglib_impl::kmeansreport*>(rhs.p_struct), &_state, ae_false);
11869 0 : ae_state_clear(&_state);
11870 0 : }
11871 :
11872 0 : _kmeansreport_owner& _kmeansreport_owner::operator=(const _kmeansreport_owner &rhs)
11873 : {
11874 0 : if( this==&rhs )
11875 0 : return *this;
11876 : jmp_buf _break_jump;
11877 : alglib_impl::ae_state _state;
11878 :
11879 0 : alglib_impl::ae_state_init(&_state);
11880 0 : if( setjmp(_break_jump) )
11881 : {
11882 : #if !defined(AE_NO_EXCEPTIONS)
11883 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
11884 : #else
11885 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
11886 : return *this;
11887 : #endif
11888 : }
11889 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
11890 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: kmeansreport assignment constructor failure (destination is not initialized)", &_state);
11891 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: kmeansreport assignment constructor failure (source is not initialized)", &_state);
11892 0 : alglib_impl::_kmeansreport_destroy(p_struct);
11893 0 : memset(p_struct, 0, sizeof(alglib_impl::kmeansreport));
11894 0 : alglib_impl::_kmeansreport_init_copy(p_struct, const_cast<alglib_impl::kmeansreport*>(rhs.p_struct), &_state, ae_false);
11895 0 : ae_state_clear(&_state);
11896 0 : return *this;
11897 : }
11898 :
11899 0 : _kmeansreport_owner::~_kmeansreport_owner()
11900 : {
11901 0 : if( p_struct!=NULL )
11902 : {
11903 0 : alglib_impl::_kmeansreport_destroy(p_struct);
11904 0 : ae_free(p_struct);
11905 : }
11906 0 : }
11907 :
11908 0 : alglib_impl::kmeansreport* _kmeansreport_owner::c_ptr()
11909 : {
11910 0 : return p_struct;
11911 : }
11912 :
11913 0 : alglib_impl::kmeansreport* _kmeansreport_owner::c_ptr() const
11914 : {
11915 0 : return const_cast<alglib_impl::kmeansreport*>(p_struct);
11916 : }
11917 0 : kmeansreport::kmeansreport() : _kmeansreport_owner() ,npoints(p_struct->npoints),nfeatures(p_struct->nfeatures),terminationtype(p_struct->terminationtype),iterationscount(p_struct->iterationscount),energy(p_struct->energy),k(p_struct->k),c(&p_struct->c),cidx(&p_struct->cidx)
11918 : {
11919 0 : }
11920 :
11921 0 : kmeansreport::kmeansreport(const kmeansreport &rhs):_kmeansreport_owner(rhs) ,npoints(p_struct->npoints),nfeatures(p_struct->nfeatures),terminationtype(p_struct->terminationtype),iterationscount(p_struct->iterationscount),energy(p_struct->energy),k(p_struct->k),c(&p_struct->c),cidx(&p_struct->cidx)
11922 : {
11923 0 : }
11924 :
11925 0 : kmeansreport& kmeansreport::operator=(const kmeansreport &rhs)
11926 : {
11927 0 : if( this==&rhs )
11928 0 : return *this;
11929 0 : _kmeansreport_owner::operator=(rhs);
11930 0 : return *this;
11931 : }
11932 :
11933 0 : kmeansreport::~kmeansreport()
11934 : {
11935 0 : }
11936 :
11937 : /*************************************************************************
11938 : This function initializes clusterizer object. Newly initialized object is
11939 : empty, i.e. it does not contain dataset. You should use it as follows:
11940 : 1. creation
11941 : 2. dataset is added with ClusterizerSetPoints()
11942 : 3. additional parameters are set
11943 : 3. clusterization is performed with one of the clustering functions
11944 :
11945 : -- ALGLIB --
11946 : Copyright 10.07.2012 by Bochkanov Sergey
11947 : *************************************************************************/
11948 0 : void clusterizercreate(clusterizerstate &s, const xparams _xparams)
11949 : {
11950 : jmp_buf _break_jump;
11951 : alglib_impl::ae_state _alglib_env_state;
11952 0 : alglib_impl::ae_state_init(&_alglib_env_state);
11953 0 : if( setjmp(_break_jump) )
11954 : {
11955 : #if !defined(AE_NO_EXCEPTIONS)
11956 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
11957 : #else
11958 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
11959 : return;
11960 : #endif
11961 : }
11962 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
11963 0 : if( _xparams.flags!=0x0 )
11964 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
11965 0 : alglib_impl::clusterizercreate(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), &_alglib_env_state);
11966 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
11967 0 : return;
11968 : }
11969 :
11970 : /*************************************************************************
11971 : This function adds dataset to the clusterizer structure.
11972 :
11973 : This function overrides all previous calls of ClusterizerSetPoints() or
11974 : ClusterizerSetDistances().
11975 :
11976 : INPUT PARAMETERS:
11977 : S - clusterizer state, initialized by ClusterizerCreate()
11978 : XY - array[NPoints,NFeatures], dataset
11979 : NPoints - number of points, >=0
11980 : NFeatures- number of features, >=1
11981 : DistType- distance function:
11982 : * 0 Chebyshev distance (L-inf norm)
11983 : * 1 city block distance (L1 norm)
11984 : * 2 Euclidean distance (L2 norm), non-squared
11985 : * 10 Pearson correlation:
11986 : dist(a,b) = 1-corr(a,b)
11987 : * 11 Absolute Pearson correlation:
11988 : dist(a,b) = 1-|corr(a,b)|
11989 : * 12 Uncentered Pearson correlation (cosine of the angle):
11990 : dist(a,b) = a'*b/(|a|*|b|)
11991 : * 13 Absolute uncentered Pearson correlation
11992 : dist(a,b) = |a'*b|/(|a|*|b|)
11993 : * 20 Spearman rank correlation:
11994 : dist(a,b) = 1-rankcorr(a,b)
11995 : * 21 Absolute Spearman rank correlation
11996 : dist(a,b) = 1-|rankcorr(a,b)|
11997 :
11998 : NOTE 1: different distance functions have different performance penalty:
11999 : * Euclidean or Pearson correlation distances are the fastest ones
12000 : * Spearman correlation distance function is a bit slower
12001 : * city block and Chebyshev distances are order of magnitude slower
12002 :
12003 : The reason behing difference in performance is that correlation-based
12004 : distance functions are computed using optimized linear algebra kernels,
12005 : while Chebyshev and city block distance functions are computed using
12006 : simple nested loops with two branches at each iteration.
12007 :
12008 : NOTE 2: different clustering algorithms have different limitations:
12009 : * agglomerative hierarchical clustering algorithms may be used with
12010 : any kind of distance metric
12011 : * k-means++ clustering algorithm may be used only with Euclidean
12012 : distance function
12013 : Thus, list of specific clustering algorithms you may use depends
12014 : on distance function you specify when you set your dataset.
12015 :
12016 : -- ALGLIB --
12017 : Copyright 10.07.2012 by Bochkanov Sergey
12018 : *************************************************************************/
12019 0 : void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, const xparams _xparams)
12020 : {
12021 : jmp_buf _break_jump;
12022 : alglib_impl::ae_state _alglib_env_state;
12023 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12024 0 : if( setjmp(_break_jump) )
12025 : {
12026 : #if !defined(AE_NO_EXCEPTIONS)
12027 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12028 : #else
12029 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12030 : return;
12031 : #endif
12032 : }
12033 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12034 0 : if( _xparams.flags!=0x0 )
12035 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12036 0 : alglib_impl::clusterizersetpoints(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, &_alglib_env_state);
12037 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12038 0 : return;
12039 : }
12040 :
12041 : /*************************************************************************
12042 : This function adds dataset to the clusterizer structure.
12043 :
12044 : This function overrides all previous calls of ClusterizerSetPoints() or
12045 : ClusterizerSetDistances().
12046 :
12047 : INPUT PARAMETERS:
12048 : S - clusterizer state, initialized by ClusterizerCreate()
12049 : XY - array[NPoints,NFeatures], dataset
12050 : NPoints - number of points, >=0
12051 : NFeatures- number of features, >=1
12052 : DistType- distance function:
12053 : * 0 Chebyshev distance (L-inf norm)
12054 : * 1 city block distance (L1 norm)
12055 : * 2 Euclidean distance (L2 norm), non-squared
12056 : * 10 Pearson correlation:
12057 : dist(a,b) = 1-corr(a,b)
12058 : * 11 Absolute Pearson correlation:
12059 : dist(a,b) = 1-|corr(a,b)|
12060 : * 12 Uncentered Pearson correlation (cosine of the angle):
12061 : dist(a,b) = a'*b/(|a|*|b|)
12062 : * 13 Absolute uncentered Pearson correlation
12063 : dist(a,b) = |a'*b|/(|a|*|b|)
12064 : * 20 Spearman rank correlation:
12065 : dist(a,b) = 1-rankcorr(a,b)
12066 : * 21 Absolute Spearman rank correlation
12067 : dist(a,b) = 1-|rankcorr(a,b)|
12068 :
12069 : NOTE 1: different distance functions have different performance penalty:
12070 : * Euclidean or Pearson correlation distances are the fastest ones
12071 : * Spearman correlation distance function is a bit slower
12072 : * city block and Chebyshev distances are order of magnitude slower
12073 :
12074 : The reason behing difference in performance is that correlation-based
12075 : distance functions are computed using optimized linear algebra kernels,
12076 : while Chebyshev and city block distance functions are computed using
12077 : simple nested loops with two branches at each iteration.
12078 :
12079 : NOTE 2: different clustering algorithms have different limitations:
12080 : * agglomerative hierarchical clustering algorithms may be used with
12081 : any kind of distance metric
12082 : * k-means++ clustering algorithm may be used only with Euclidean
12083 : distance function
12084 : Thus, list of specific clustering algorithms you may use depends
12085 : on distance function you specify when you set your dataset.
12086 :
12087 : -- ALGLIB --
12088 : Copyright 10.07.2012 by Bochkanov Sergey
12089 : *************************************************************************/
12090 : #if !defined(AE_NO_EXCEPTIONS)
12091 0 : void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t disttype, const xparams _xparams)
12092 : {
12093 : jmp_buf _break_jump;
12094 : alglib_impl::ae_state _alglib_env_state;
12095 : ae_int_t npoints;
12096 : ae_int_t nfeatures;
12097 :
12098 0 : npoints = xy.rows();
12099 0 : nfeatures = xy.cols();
12100 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12101 0 : if( setjmp(_break_jump) )
12102 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12103 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12104 0 : if( _xparams.flags!=0x0 )
12105 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12106 0 : alglib_impl::clusterizersetpoints(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, &_alglib_env_state);
12107 :
12108 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12109 0 : return;
12110 : }
12111 : #endif
12112 :
12113 : /*************************************************************************
12114 : This function adds dataset given by distance matrix to the clusterizer
12115 : structure. It is important that dataset is not given explicitly - only
12116 : distance matrix is given.
12117 :
12118 : This function overrides all previous calls of ClusterizerSetPoints() or
12119 : ClusterizerSetDistances().
12120 :
12121 : INPUT PARAMETERS:
12122 : S - clusterizer state, initialized by ClusterizerCreate()
12123 : D - array[NPoints,NPoints], distance matrix given by its upper
12124 : or lower triangle (main diagonal is ignored because its
12125 : entries are expected to be zero).
12126 : NPoints - number of points
12127 : IsUpper - whether upper or lower triangle of D is given.
12128 :
12129 : NOTE 1: different clustering algorithms have different limitations:
12130 : * agglomerative hierarchical clustering algorithms may be used with
12131 : any kind of distance metric, including one which is given by
12132 : distance matrix
12133 : * k-means++ clustering algorithm may be used only with Euclidean
12134 : distance function and explicitly given points - it can not be
12135 : used with dataset given by distance matrix
12136 : Thus, if you call this function, you will be unable to use k-means
12137 : clustering algorithm to process your problem.
12138 :
12139 : -- ALGLIB --
12140 : Copyright 10.07.2012 by Bochkanov Sergey
12141 : *************************************************************************/
12142 0 : void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const ae_int_t npoints, const bool isupper, const xparams _xparams)
12143 : {
12144 : jmp_buf _break_jump;
12145 : alglib_impl::ae_state _alglib_env_state;
12146 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12147 0 : if( setjmp(_break_jump) )
12148 : {
12149 : #if !defined(AE_NO_EXCEPTIONS)
12150 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12151 : #else
12152 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12153 : return;
12154 : #endif
12155 : }
12156 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12157 0 : if( _xparams.flags!=0x0 )
12158 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12159 0 : alglib_impl::clusterizersetdistances(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), npoints, isupper, &_alglib_env_state);
12160 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12161 0 : return;
12162 : }
12163 :
12164 : /*************************************************************************
12165 : This function adds dataset given by distance matrix to the clusterizer
12166 : structure. It is important that dataset is not given explicitly - only
12167 : distance matrix is given.
12168 :
12169 : This function overrides all previous calls of ClusterizerSetPoints() or
12170 : ClusterizerSetDistances().
12171 :
12172 : INPUT PARAMETERS:
12173 : S - clusterizer state, initialized by ClusterizerCreate()
12174 : D - array[NPoints,NPoints], distance matrix given by its upper
12175 : or lower triangle (main diagonal is ignored because its
12176 : entries are expected to be zero).
12177 : NPoints - number of points
12178 : IsUpper - whether upper or lower triangle of D is given.
12179 :
12180 : NOTE 1: different clustering algorithms have different limitations:
12181 : * agglomerative hierarchical clustering algorithms may be used with
12182 : any kind of distance metric, including one which is given by
12183 : distance matrix
12184 : * k-means++ clustering algorithm may be used only with Euclidean
12185 : distance function and explicitly given points - it can not be
12186 : used with dataset given by distance matrix
12187 : Thus, if you call this function, you will be unable to use k-means
12188 : clustering algorithm to process your problem.
12189 :
12190 : -- ALGLIB --
12191 : Copyright 10.07.2012 by Bochkanov Sergey
12192 : *************************************************************************/
12193 : #if !defined(AE_NO_EXCEPTIONS)
12194 0 : void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const bool isupper, const xparams _xparams)
12195 : {
12196 : jmp_buf _break_jump;
12197 : alglib_impl::ae_state _alglib_env_state;
12198 : ae_int_t npoints;
12199 0 : if( (d.rows()!=d.cols()))
12200 0 : _ALGLIB_CPP_EXCEPTION("Error while calling 'clusterizersetdistances': looks like one of arguments has wrong size");
12201 0 : npoints = d.rows();
12202 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12203 0 : if( setjmp(_break_jump) )
12204 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12205 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12206 0 : if( _xparams.flags!=0x0 )
12207 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12208 0 : alglib_impl::clusterizersetdistances(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), npoints, isupper, &_alglib_env_state);
12209 :
12210 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12211 0 : return;
12212 : }
12213 : #endif
12214 :
12215 : /*************************************************************************
12216 : This function sets agglomerative hierarchical clustering algorithm
12217 :
12218 : INPUT PARAMETERS:
12219 : S - clusterizer state, initialized by ClusterizerCreate()
12220 : Algo - algorithm type:
12221 : * 0 complete linkage (default algorithm)
12222 : * 1 single linkage
12223 : * 2 unweighted average linkage
12224 : * 3 weighted average linkage
12225 : * 4 Ward's method
12226 :
12227 : NOTE: Ward's method works correctly only with Euclidean distance, that's
12228 : why algorithm will return negative termination code (failure) for
12229 : any other distance type.
12230 :
12231 : It is possible, however, to use this method with user-supplied
12232 : distance matrix. It is your responsibility to pass one which was
12233 : calculated with Euclidean distance function.
12234 :
12235 : -- ALGLIB --
12236 : Copyright 10.07.2012 by Bochkanov Sergey
12237 : *************************************************************************/
12238 0 : void clusterizersetahcalgo(const clusterizerstate &s, const ae_int_t algo, const xparams _xparams)
12239 : {
12240 : jmp_buf _break_jump;
12241 : alglib_impl::ae_state _alglib_env_state;
12242 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12243 0 : if( setjmp(_break_jump) )
12244 : {
12245 : #if !defined(AE_NO_EXCEPTIONS)
12246 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12247 : #else
12248 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12249 : return;
12250 : #endif
12251 : }
12252 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12253 0 : if( _xparams.flags!=0x0 )
12254 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12255 0 : alglib_impl::clusterizersetahcalgo(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), algo, &_alglib_env_state);
12256 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12257 0 : return;
12258 : }
12259 :
12260 : /*************************************************************************
12261 : This function sets k-means properties: number of restarts and maximum
12262 : number of iterations per one run.
12263 :
12264 : INPUT PARAMETERS:
12265 : S - clusterizer state, initialized by ClusterizerCreate()
12266 : Restarts- restarts count, >=1.
12267 : k-means++ algorithm performs several restarts and chooses
12268 : best set of centers (one with minimum squared distance).
12269 : MaxIts - maximum number of k-means iterations performed during one
12270 : run. >=0, zero value means that algorithm performs unlimited
12271 : number of iterations.
12272 :
12273 : -- ALGLIB --
12274 : Copyright 10.07.2012 by Bochkanov Sergey
12275 : *************************************************************************/
12276 0 : void clusterizersetkmeanslimits(const clusterizerstate &s, const ae_int_t restarts, const ae_int_t maxits, const xparams _xparams)
12277 : {
12278 : jmp_buf _break_jump;
12279 : alglib_impl::ae_state _alglib_env_state;
12280 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12281 0 : if( setjmp(_break_jump) )
12282 : {
12283 : #if !defined(AE_NO_EXCEPTIONS)
12284 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12285 : #else
12286 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12287 : return;
12288 : #endif
12289 : }
12290 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12291 0 : if( _xparams.flags!=0x0 )
12292 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12293 0 : alglib_impl::clusterizersetkmeanslimits(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), restarts, maxits, &_alglib_env_state);
12294 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12295 0 : return;
12296 : }
12297 :
12298 : /*************************************************************************
12299 : This function sets k-means initialization algorithm. Several different
12300 : algorithms can be chosen, including k-means++.
12301 :
12302 : INPUT PARAMETERS:
12303 : S - clusterizer state, initialized by ClusterizerCreate()
12304 : InitAlgo- initialization algorithm:
12305 : * 0 automatic selection ( different versions of ALGLIB
12306 : may select different algorithms)
12307 : * 1 random initialization
12308 : * 2 k-means++ initialization (best quality of initial
12309 : centers, but long non-parallelizable initialization
12310 : phase with bad cache locality)
12311 : * 3 "fast-greedy" algorithm with efficient, easy to
12312 : parallelize initialization. Quality of initial centers
12313 : is somewhat worse than that of k-means++. This
12314 : algorithm is a default one in the current version of
12315 : ALGLIB.
12316 : *-1 "debug" algorithm which always selects first K rows
12317 : of dataset; this algorithm is used for debug purposes
12318 : only. Do not use it in the industrial code!
12319 :
12320 : -- ALGLIB --
12321 : Copyright 21.01.2015 by Bochkanov Sergey
12322 : *************************************************************************/
12323 0 : void clusterizersetkmeansinit(const clusterizerstate &s, const ae_int_t initalgo, const xparams _xparams)
12324 : {
12325 : jmp_buf _break_jump;
12326 : alglib_impl::ae_state _alglib_env_state;
12327 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12328 0 : if( setjmp(_break_jump) )
12329 : {
12330 : #if !defined(AE_NO_EXCEPTIONS)
12331 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12332 : #else
12333 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12334 : return;
12335 : #endif
12336 : }
12337 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12338 0 : if( _xparams.flags!=0x0 )
12339 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12340 0 : alglib_impl::clusterizersetkmeansinit(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), initalgo, &_alglib_env_state);
12341 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12342 0 : return;
12343 : }
12344 :
12345 : /*************************************************************************
12346 : This function sets seed which is used to initialize internal RNG. By
12347 : default, deterministic seed is used - same for each run of clusterizer. If
12348 : you specify non-deterministic seed value, then some algorithms which
12349 : depend on random initialization (in current version: k-means) may return
12350 : slightly different results after each run.
12351 :
12352 : INPUT PARAMETERS:
12353 : S - clusterizer state, initialized by ClusterizerCreate()
12354 : Seed - seed:
12355 : * positive values = use deterministic seed for each run of
12356 : algorithms which depend on random initialization
12357 : * zero or negative values = use non-deterministic seed
12358 :
12359 : -- ALGLIB --
12360 : Copyright 08.06.2017 by Bochkanov Sergey
12361 : *************************************************************************/
12362 0 : void clusterizersetseed(const clusterizerstate &s, const ae_int_t seed, const xparams _xparams)
12363 : {
12364 : jmp_buf _break_jump;
12365 : alglib_impl::ae_state _alglib_env_state;
12366 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12367 0 : if( setjmp(_break_jump) )
12368 : {
12369 : #if !defined(AE_NO_EXCEPTIONS)
12370 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12371 : #else
12372 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12373 : return;
12374 : #endif
12375 : }
12376 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12377 0 : if( _xparams.flags!=0x0 )
12378 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12379 0 : alglib_impl::clusterizersetseed(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), seed, &_alglib_env_state);
12380 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12381 0 : return;
12382 : }
12383 :
12384 : /*************************************************************************
12385 : This function performs agglomerative hierarchical clustering
12386 :
12387 : ! COMMERCIAL EDITION OF ALGLIB:
12388 : !
12389 : ! Commercial Edition of ALGLIB includes following important improvements
12390 : ! of this function:
12391 : ! * high-performance native backend with same C# interface (C# version)
12392 : ! * multithreading support (C++ and C# versions)
12393 : ! * hardware vendor (Intel) implementations of linear algebra primitives
12394 : ! (C++ and C# versions, x86/x64 platform)
12395 : !
12396 : ! We recommend you to read 'Working with commercial version' section of
12397 : ! ALGLIB Reference Manual in order to find out how to use performance-
12398 : ! related features provided by commercial edition of ALGLIB.
12399 :
12400 : NOTE: Agglomerative hierarchical clustering algorithm has two phases:
12401 : distance matrix calculation and clustering itself. Only first phase
12402 : (distance matrix calculation) is accelerated by Intel MKL and
12403 : multithreading. Thus, acceleration is significant only for medium or
12404 : high-dimensional problems.
12405 :
12406 : Although activating multithreading gives some speedup over single-
12407 : threaded execution, you should not expect nearly-linear scaling
12408 : with respect to cores count.
12409 :
12410 : INPUT PARAMETERS:
12411 : S - clusterizer state, initialized by ClusterizerCreate()
12412 :
12413 : OUTPUT PARAMETERS:
12414 : Rep - clustering results; see description of AHCReport
12415 : structure for more information.
12416 :
12417 : NOTE 1: hierarchical clustering algorithms require large amounts of memory.
12418 : In particular, this implementation needs sizeof(double)*NPoints^2
12419 : bytes, which are used to store distance matrix. In case we work
12420 : with user-supplied matrix, this amount is multiplied by 2 (we have
12421 : to store original matrix and to work with its copy).
12422 :
12423 : For example, problem with 10000 points would require 800M of RAM,
12424 : even when working in a 1-dimensional space.
12425 :
12426 : -- ALGLIB --
12427 : Copyright 10.07.2012 by Bochkanov Sergey
12428 : *************************************************************************/
12429 0 : void clusterizerrunahc(const clusterizerstate &s, ahcreport &rep, const xparams _xparams)
12430 : {
12431 : jmp_buf _break_jump;
12432 : alglib_impl::ae_state _alglib_env_state;
12433 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12434 0 : if( setjmp(_break_jump) )
12435 : {
12436 : #if !defined(AE_NO_EXCEPTIONS)
12437 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12438 : #else
12439 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12440 : return;
12441 : #endif
12442 : }
12443 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12444 0 : if( _xparams.flags!=0x0 )
12445 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12446 0 : alglib_impl::clusterizerrunahc(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), &_alglib_env_state);
12447 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12448 0 : return;
12449 : }
12450 :
12451 : /*************************************************************************
12452 : This function performs clustering by k-means++ algorithm.
12453 :
12454 : You may change algorithm properties by calling:
12455 : * ClusterizerSetKMeansLimits() to change number of restarts or iterations
12456 : * ClusterizerSetKMeansInit() to change initialization algorithm
12457 :
12458 : By default, one restart and unlimited number of iterations are used.
12459 : Initialization algorithm is chosen automatically.
12460 :
12461 : ! COMMERCIAL EDITION OF ALGLIB:
12462 : !
12463 : ! Commercial Edition of ALGLIB includes following important improvements
12464 : ! of this function:
12465 : ! * high-performance native backend with same C# interface (C# version)
12466 : ! * multithreading support (C++ and C# versions)
12467 : ! * hardware vendor (Intel) implementations of linear algebra primitives
12468 : ! (C++ and C# versions, x86/x64 platform)
12469 : !
12470 : ! We recommend you to read 'Working with commercial version' section of
12471 : ! ALGLIB Reference Manual in order to find out how to use performance-
12472 : ! related features provided by commercial edition of ALGLIB.
12473 :
12474 : NOTE: k-means clustering algorithm has two phases: selection of initial
12475 : centers and clustering itself. ALGLIB parallelizes both phases.
12476 : Parallel version is optimized for the following scenario: medium or
12477 : high-dimensional problem (8 or more dimensions) with large number of
12478 : points and clusters. However, some speed-up can be obtained even
12479 : when assumptions above are violated.
12480 :
12481 : INPUT PARAMETERS:
12482 : S - clusterizer state, initialized by ClusterizerCreate()
12483 : K - number of clusters, K>=0.
12484 : K can be zero only when algorithm is called for empty
12485 : dataset, in this case completion code is set to
12486 : success (+1).
12487 : If K=0 and dataset size is non-zero, we can not
12488 : meaningfully assign points to some center (there are no
12489 : centers because K=0) and return -3 as completion code
12490 : (failure).
12491 :
12492 : OUTPUT PARAMETERS:
12493 : Rep - clustering results; see description of KMeansReport
12494 : structure for more information.
12495 :
12496 : NOTE 1: k-means clustering can be performed only for datasets with
12497 : Euclidean distance function. Algorithm will return negative
12498 : completion code in Rep.TerminationType in case dataset was added
12499 : to clusterizer with DistType other than Euclidean (or dataset was
12500 : specified by distance matrix instead of explicitly given points).
12501 :
12502 : NOTE 2: by default, k-means uses non-deterministic seed to initialize RNG
12503 : which is used to select initial centers. As result, each run of
12504 : algorithm may return different values. If you need deterministic
12505 : behavior, use ClusterizerSetSeed() function.
12506 :
12507 : -- ALGLIB --
12508 : Copyright 10.07.2012 by Bochkanov Sergey
12509 : *************************************************************************/
12510 0 : void clusterizerrunkmeans(const clusterizerstate &s, const ae_int_t k, kmeansreport &rep, const xparams _xparams)
12511 : {
12512 : jmp_buf _break_jump;
12513 : alglib_impl::ae_state _alglib_env_state;
12514 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12515 0 : if( setjmp(_break_jump) )
12516 : {
12517 : #if !defined(AE_NO_EXCEPTIONS)
12518 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12519 : #else
12520 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12521 : return;
12522 : #endif
12523 : }
12524 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12525 0 : if( _xparams.flags!=0x0 )
12526 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12527 0 : alglib_impl::clusterizerrunkmeans(const_cast<alglib_impl::clusterizerstate*>(s.c_ptr()), k, const_cast<alglib_impl::kmeansreport*>(rep.c_ptr()), &_alglib_env_state);
12528 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12529 0 : return;
12530 : }
12531 :
12532 : /*************************************************************************
12533 : This function returns distance matrix for dataset
12534 :
12535 : ! COMMERCIAL EDITION OF ALGLIB:
12536 : !
12537 : ! Commercial Edition of ALGLIB includes following important improvements
12538 : ! of this function:
12539 : ! * high-performance native backend with same C# interface (C# version)
12540 : ! * multithreading support (C++ and C# versions)
12541 : ! * hardware vendor (Intel) implementations of linear algebra primitives
12542 : ! (C++ and C# versions, x86/x64 platform)
12543 : !
12544 : ! We recommend you to read 'Working with commercial version' section of
12545 : ! ALGLIB Reference Manual in order to find out how to use performance-
12546 : ! related features provided by commercial edition of ALGLIB.
12547 :
12548 : INPUT PARAMETERS:
12549 : XY - array[NPoints,NFeatures], dataset
12550 : NPoints - number of points, >=0
12551 : NFeatures- number of features, >=1
12552 : DistType- distance function:
12553 : * 0 Chebyshev distance (L-inf norm)
12554 : * 1 city block distance (L1 norm)
12555 : * 2 Euclidean distance (L2 norm, non-squared)
12556 : * 10 Pearson correlation:
12557 : dist(a,b) = 1-corr(a,b)
12558 : * 11 Absolute Pearson correlation:
12559 : dist(a,b) = 1-|corr(a,b)|
12560 : * 12 Uncentered Pearson correlation (cosine of the angle):
12561 : dist(a,b) = a'*b/(|a|*|b|)
12562 : * 13 Absolute uncentered Pearson correlation
12563 : dist(a,b) = |a'*b|/(|a|*|b|)
12564 : * 20 Spearman rank correlation:
12565 : dist(a,b) = 1-rankcorr(a,b)
12566 : * 21 Absolute Spearman rank correlation
12567 : dist(a,b) = 1-|rankcorr(a,b)|
12568 :
12569 : OUTPUT PARAMETERS:
12570 : D - array[NPoints,NPoints], distance matrix
12571 : (full matrix is returned, with lower and upper triangles)
12572 :
12573 : NOTE: different distance functions have different performance penalty:
12574 : * Euclidean or Pearson correlation distances are the fastest ones
12575 : * Spearman correlation distance function is a bit slower
12576 : * city block and Chebyshev distances are order of magnitude slower
12577 :
12578 : The reason behing difference in performance is that correlation-based
12579 : distance functions are computed using optimized linear algebra kernels,
12580 : while Chebyshev and city block distance functions are computed using
12581 : simple nested loops with two branches at each iteration.
12582 :
12583 : -- ALGLIB --
12584 : Copyright 10.07.2012 by Bochkanov Sergey
12585 : *************************************************************************/
12586 0 : void clusterizergetdistances(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, real_2d_array &d, const xparams _xparams)
12587 : {
12588 : jmp_buf _break_jump;
12589 : alglib_impl::ae_state _alglib_env_state;
12590 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12591 0 : if( setjmp(_break_jump) )
12592 : {
12593 : #if !defined(AE_NO_EXCEPTIONS)
12594 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12595 : #else
12596 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12597 : return;
12598 : #endif
12599 : }
12600 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12601 0 : if( _xparams.flags!=0x0 )
12602 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12603 0 : alglib_impl::clusterizergetdistances(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nfeatures, disttype, const_cast<alglib_impl::ae_matrix*>(d.c_ptr()), &_alglib_env_state);
12604 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12605 0 : return;
12606 : }
12607 :
12608 : /*************************************************************************
12609 : This function takes as input clusterization report Rep, desired clusters
12610 : count K, and builds top K clusters from hierarchical clusterization tree.
12611 : It returns assignment of points to clusters (array of cluster indexes).
12612 :
12613 : INPUT PARAMETERS:
12614 : Rep - report from ClusterizerRunAHC() performed on XY
12615 : K - desired number of clusters, 1<=K<=NPoints.
12616 : K can be zero only when NPoints=0.
12617 :
12618 : OUTPUT PARAMETERS:
12619 : CIdx - array[NPoints], I-th element contains cluster index (from
12620 : 0 to K-1) for I-th point of the dataset.
12621 : CZ - array[K]. This array allows to convert cluster indexes
12622 : returned by this function to indexes used by Rep.Z. J-th
12623 : cluster returned by this function corresponds to CZ[J]-th
12624 : cluster stored in Rep.Z/PZ/PM.
12625 : It is guaranteed that CZ[I]<CZ[I+1].
12626 :
12627 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
12628 : Although they were obtained by manipulation with top K nodes of
12629 : dendrogram (i.e. hierarchical decomposition of dataset), this
12630 : function does not return information about hierarchy. Each of the
12631 : clusters stand on its own.
12632 :
12633 : NOTE: Cluster indexes returned by this function does not correspond to
12634 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
12635 : representation of the dataset (dendrogram), or you work with "flat"
12636 : representation returned by this function. Each of representations
12637 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
12638 : while latter uses [0..K-1]), although it is possible to perform
12639 : conversion from one system to another by means of CZ array, returned
12640 : by this function, which allows you to convert indexes stored in CIdx
12641 : to the numeration system used by Rep.Z.
12642 :
12643 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
12644 : it will perform many times faster than for K=100. Its worst-case
12645 : performance is O(N*K), although in average case it perform better
12646 : (up to O(N*log(K))).
12647 :
12648 : -- ALGLIB --
12649 : Copyright 10.07.2012 by Bochkanov Sergey
12650 : *************************************************************************/
12651 0 : void clusterizergetkclusters(const ahcreport &rep, const ae_int_t k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
12652 : {
12653 : jmp_buf _break_jump;
12654 : alglib_impl::ae_state _alglib_env_state;
12655 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12656 0 : if( setjmp(_break_jump) )
12657 : {
12658 : #if !defined(AE_NO_EXCEPTIONS)
12659 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12660 : #else
12661 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12662 : return;
12663 : #endif
12664 : }
12665 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12666 0 : if( _xparams.flags!=0x0 )
12667 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12668 0 : alglib_impl::clusterizergetkclusters(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
12669 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12670 0 : return;
12671 : }
12672 :
12673 : /*************************************************************************
12674 : This function accepts AHC report Rep, desired minimum intercluster
12675 : distance and returns top clusters from hierarchical clusterization tree
12676 : which are separated by distance R or HIGHER.
12677 :
12678 : It returns assignment of points to clusters (array of cluster indexes).
12679 :
12680 : There is one more function with similar name - ClusterizerSeparatedByCorr,
12681 : which returns clusters with intercluster correlation equal to R or LOWER
12682 : (note: higher for distance, lower for correlation).
12683 :
12684 : INPUT PARAMETERS:
12685 : Rep - report from ClusterizerRunAHC() performed on XY
12686 : R - desired minimum intercluster distance, R>=0
12687 :
12688 : OUTPUT PARAMETERS:
12689 : K - number of clusters, 1<=K<=NPoints
12690 : CIdx - array[NPoints], I-th element contains cluster index (from
12691 : 0 to K-1) for I-th point of the dataset.
12692 : CZ - array[K]. This array allows to convert cluster indexes
12693 : returned by this function to indexes used by Rep.Z. J-th
12694 : cluster returned by this function corresponds to CZ[J]-th
12695 : cluster stored in Rep.Z/PZ/PM.
12696 : It is guaranteed that CZ[I]<CZ[I+1].
12697 :
12698 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
12699 : Although they were obtained by manipulation with top K nodes of
12700 : dendrogram (i.e. hierarchical decomposition of dataset), this
12701 : function does not return information about hierarchy. Each of the
12702 : clusters stand on its own.
12703 :
12704 : NOTE: Cluster indexes returned by this function does not correspond to
12705 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
12706 : representation of the dataset (dendrogram), or you work with "flat"
12707 : representation returned by this function. Each of representations
12708 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
12709 : while latter uses [0..K-1]), although it is possible to perform
12710 : conversion from one system to another by means of CZ array, returned
12711 : by this function, which allows you to convert indexes stored in CIdx
12712 : to the numeration system used by Rep.Z.
12713 :
12714 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
12715 : it will perform many times faster than for K=100. Its worst-case
12716 : performance is O(N*K), although in average case it perform better
12717 : (up to O(N*log(K))).
12718 :
12719 : -- ALGLIB --
12720 : Copyright 10.07.2012 by Bochkanov Sergey
12721 : *************************************************************************/
12722 0 : void clusterizerseparatedbydist(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
12723 : {
12724 : jmp_buf _break_jump;
12725 : alglib_impl::ae_state _alglib_env_state;
12726 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12727 0 : if( setjmp(_break_jump) )
12728 : {
12729 : #if !defined(AE_NO_EXCEPTIONS)
12730 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12731 : #else
12732 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12733 : return;
12734 : #endif
12735 : }
12736 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12737 0 : if( _xparams.flags!=0x0 )
12738 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12739 0 : alglib_impl::clusterizerseparatedbydist(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), r, &k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
12740 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12741 0 : return;
12742 : }
12743 :
12744 : /*************************************************************************
12745 : This function accepts AHC report Rep, desired maximum intercluster
12746 : correlation and returns top clusters from hierarchical clusterization tree
12747 : which are separated by correlation R or LOWER.
12748 :
12749 : It returns assignment of points to clusters (array of cluster indexes).
12750 :
12751 : There is one more function with similar name - ClusterizerSeparatedByDist,
12752 : which returns clusters with intercluster distance equal to R or HIGHER
12753 : (note: higher for distance, lower for correlation).
12754 :
12755 : INPUT PARAMETERS:
12756 : Rep - report from ClusterizerRunAHC() performed on XY
12757 : R - desired maximum intercluster correlation, -1<=R<=+1
12758 :
12759 : OUTPUT PARAMETERS:
12760 : K - number of clusters, 1<=K<=NPoints
12761 : CIdx - array[NPoints], I-th element contains cluster index (from
12762 : 0 to K-1) for I-th point of the dataset.
12763 : CZ - array[K]. This array allows to convert cluster indexes
12764 : returned by this function to indexes used by Rep.Z. J-th
12765 : cluster returned by this function corresponds to CZ[J]-th
12766 : cluster stored in Rep.Z/PZ/PM.
12767 : It is guaranteed that CZ[I]<CZ[I+1].
12768 :
12769 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
12770 : Although they were obtained by manipulation with top K nodes of
12771 : dendrogram (i.e. hierarchical decomposition of dataset), this
12772 : function does not return information about hierarchy. Each of the
12773 : clusters stand on its own.
12774 :
12775 : NOTE: Cluster indexes returned by this function does not correspond to
12776 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
12777 : representation of the dataset (dendrogram), or you work with "flat"
12778 : representation returned by this function. Each of representations
12779 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
12780 : while latter uses [0..K-1]), although it is possible to perform
12781 : conversion from one system to another by means of CZ array, returned
12782 : by this function, which allows you to convert indexes stored in CIdx
12783 : to the numeration system used by Rep.Z.
12784 :
12785 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
12786 : it will perform many times faster than for K=100. Its worst-case
12787 : performance is O(N*K), although in average case it perform better
12788 : (up to O(N*log(K))).
12789 :
12790 : -- ALGLIB --
12791 : Copyright 10.07.2012 by Bochkanov Sergey
12792 : *************************************************************************/
12793 0 : void clusterizerseparatedbycorr(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams)
12794 : {
12795 : jmp_buf _break_jump;
12796 : alglib_impl::ae_state _alglib_env_state;
12797 0 : alglib_impl::ae_state_init(&_alglib_env_state);
12798 0 : if( setjmp(_break_jump) )
12799 : {
12800 : #if !defined(AE_NO_EXCEPTIONS)
12801 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
12802 : #else
12803 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
12804 : return;
12805 : #endif
12806 : }
12807 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
12808 0 : if( _xparams.flags!=0x0 )
12809 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
12810 0 : alglib_impl::clusterizerseparatedbycorr(const_cast<alglib_impl::ahcreport*>(rep.c_ptr()), r, &k, const_cast<alglib_impl::ae_vector*>(cidx.c_ptr()), const_cast<alglib_impl::ae_vector*>(cz.c_ptr()), &_alglib_env_state);
12811 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
12812 0 : return;
12813 : }
12814 : #endif
12815 :
12816 : #if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
12817 : /*************************************************************************
12818 : A random forest (decision forest) builder object.
12819 :
12820 : Used to store dataset and specify decision forest training algorithm settings.
12821 : *************************************************************************/
12822 0 : _decisionforestbuilder_owner::_decisionforestbuilder_owner()
12823 : {
12824 : jmp_buf _break_jump;
12825 : alglib_impl::ae_state _state;
12826 :
12827 0 : alglib_impl::ae_state_init(&_state);
12828 0 : if( setjmp(_break_jump) )
12829 : {
12830 0 : if( p_struct!=NULL )
12831 : {
12832 0 : alglib_impl::_decisionforestbuilder_destroy(p_struct);
12833 0 : alglib_impl::ae_free(p_struct);
12834 : }
12835 0 : p_struct = NULL;
12836 : #if !defined(AE_NO_EXCEPTIONS)
12837 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
12838 : #else
12839 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
12840 : return;
12841 : #endif
12842 : }
12843 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
12844 0 : p_struct = NULL;
12845 0 : p_struct = (alglib_impl::decisionforestbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuilder), &_state);
12846 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
12847 0 : alglib_impl::_decisionforestbuilder_init(p_struct, &_state, ae_false);
12848 0 : ae_state_clear(&_state);
12849 0 : }
12850 :
12851 0 : _decisionforestbuilder_owner::_decisionforestbuilder_owner(const _decisionforestbuilder_owner &rhs)
12852 : {
12853 : jmp_buf _break_jump;
12854 : alglib_impl::ae_state _state;
12855 :
12856 0 : alglib_impl::ae_state_init(&_state);
12857 0 : if( setjmp(_break_jump) )
12858 : {
12859 0 : if( p_struct!=NULL )
12860 : {
12861 0 : alglib_impl::_decisionforestbuilder_destroy(p_struct);
12862 0 : alglib_impl::ae_free(p_struct);
12863 : }
12864 0 : p_struct = NULL;
12865 : #if !defined(AE_NO_EXCEPTIONS)
12866 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
12867 : #else
12868 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
12869 : return;
12870 : #endif
12871 : }
12872 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
12873 0 : p_struct = NULL;
12874 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuilder copy constructor failure (source is not initialized)", &_state);
12875 0 : p_struct = (alglib_impl::decisionforestbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuilder), &_state);
12876 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
12877 0 : alglib_impl::_decisionforestbuilder_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuilder*>(rhs.p_struct), &_state, ae_false);
12878 0 : ae_state_clear(&_state);
12879 0 : }
12880 :
12881 0 : _decisionforestbuilder_owner& _decisionforestbuilder_owner::operator=(const _decisionforestbuilder_owner &rhs)
12882 : {
12883 0 : if( this==&rhs )
12884 0 : return *this;
12885 : jmp_buf _break_jump;
12886 : alglib_impl::ae_state _state;
12887 :
12888 0 : alglib_impl::ae_state_init(&_state);
12889 0 : if( setjmp(_break_jump) )
12890 : {
12891 : #if !defined(AE_NO_EXCEPTIONS)
12892 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
12893 : #else
12894 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
12895 : return *this;
12896 : #endif
12897 : }
12898 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
12899 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforestbuilder assignment constructor failure (destination is not initialized)", &_state);
12900 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuilder assignment constructor failure (source is not initialized)", &_state);
12901 0 : alglib_impl::_decisionforestbuilder_destroy(p_struct);
12902 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuilder));
12903 0 : alglib_impl::_decisionforestbuilder_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuilder*>(rhs.p_struct), &_state, ae_false);
12904 0 : ae_state_clear(&_state);
12905 0 : return *this;
12906 : }
12907 :
12908 0 : _decisionforestbuilder_owner::~_decisionforestbuilder_owner()
12909 : {
12910 0 : if( p_struct!=NULL )
12911 : {
12912 0 : alglib_impl::_decisionforestbuilder_destroy(p_struct);
12913 0 : ae_free(p_struct);
12914 : }
12915 0 : }
12916 :
12917 0 : alglib_impl::decisionforestbuilder* _decisionforestbuilder_owner::c_ptr()
12918 : {
12919 0 : return p_struct;
12920 : }
12921 :
12922 0 : alglib_impl::decisionforestbuilder* _decisionforestbuilder_owner::c_ptr() const
12923 : {
12924 0 : return const_cast<alglib_impl::decisionforestbuilder*>(p_struct);
12925 : }
12926 0 : decisionforestbuilder::decisionforestbuilder() : _decisionforestbuilder_owner()
12927 : {
12928 0 : }
12929 :
12930 0 : decisionforestbuilder::decisionforestbuilder(const decisionforestbuilder &rhs):_decisionforestbuilder_owner(rhs)
12931 : {
12932 0 : }
12933 :
12934 0 : decisionforestbuilder& decisionforestbuilder::operator=(const decisionforestbuilder &rhs)
12935 : {
12936 0 : if( this==&rhs )
12937 0 : return *this;
12938 0 : _decisionforestbuilder_owner::operator=(rhs);
12939 0 : return *this;
12940 : }
12941 :
12942 0 : decisionforestbuilder::~decisionforestbuilder()
12943 : {
12944 0 : }
12945 :
12946 :
12947 : /*************************************************************************
12948 : Buffer object which is used to perform various requests (usually model
12949 : inference) in the multithreaded mode (multiple threads working with same
12950 : DF object).
12951 :
12952 : This object should be created with DFCreateBuffer().
12953 : *************************************************************************/
12954 0 : _decisionforestbuffer_owner::_decisionforestbuffer_owner()
12955 : {
12956 : jmp_buf _break_jump;
12957 : alglib_impl::ae_state _state;
12958 :
12959 0 : alglib_impl::ae_state_init(&_state);
12960 0 : if( setjmp(_break_jump) )
12961 : {
12962 0 : if( p_struct!=NULL )
12963 : {
12964 0 : alglib_impl::_decisionforestbuffer_destroy(p_struct);
12965 0 : alglib_impl::ae_free(p_struct);
12966 : }
12967 0 : p_struct = NULL;
12968 : #if !defined(AE_NO_EXCEPTIONS)
12969 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
12970 : #else
12971 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
12972 : return;
12973 : #endif
12974 : }
12975 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
12976 0 : p_struct = NULL;
12977 0 : p_struct = (alglib_impl::decisionforestbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuffer), &_state);
12978 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
12979 0 : alglib_impl::_decisionforestbuffer_init(p_struct, &_state, ae_false);
12980 0 : ae_state_clear(&_state);
12981 0 : }
12982 :
12983 0 : _decisionforestbuffer_owner::_decisionforestbuffer_owner(const _decisionforestbuffer_owner &rhs)
12984 : {
12985 : jmp_buf _break_jump;
12986 : alglib_impl::ae_state _state;
12987 :
12988 0 : alglib_impl::ae_state_init(&_state);
12989 0 : if( setjmp(_break_jump) )
12990 : {
12991 0 : if( p_struct!=NULL )
12992 : {
12993 0 : alglib_impl::_decisionforestbuffer_destroy(p_struct);
12994 0 : alglib_impl::ae_free(p_struct);
12995 : }
12996 0 : p_struct = NULL;
12997 : #if !defined(AE_NO_EXCEPTIONS)
12998 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
12999 : #else
13000 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13001 : return;
13002 : #endif
13003 : }
13004 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13005 0 : p_struct = NULL;
13006 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuffer copy constructor failure (source is not initialized)", &_state);
13007 0 : p_struct = (alglib_impl::decisionforestbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforestbuffer), &_state);
13008 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
13009 0 : alglib_impl::_decisionforestbuffer_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuffer*>(rhs.p_struct), &_state, ae_false);
13010 0 : ae_state_clear(&_state);
13011 0 : }
13012 :
13013 0 : _decisionforestbuffer_owner& _decisionforestbuffer_owner::operator=(const _decisionforestbuffer_owner &rhs)
13014 : {
13015 0 : if( this==&rhs )
13016 0 : return *this;
13017 : jmp_buf _break_jump;
13018 : alglib_impl::ae_state _state;
13019 :
13020 0 : alglib_impl::ae_state_init(&_state);
13021 0 : if( setjmp(_break_jump) )
13022 : {
13023 : #if !defined(AE_NO_EXCEPTIONS)
13024 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13025 : #else
13026 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13027 : return *this;
13028 : #endif
13029 : }
13030 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13031 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforestbuffer assignment constructor failure (destination is not initialized)", &_state);
13032 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforestbuffer assignment constructor failure (source is not initialized)", &_state);
13033 0 : alglib_impl::_decisionforestbuffer_destroy(p_struct);
13034 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforestbuffer));
13035 0 : alglib_impl::_decisionforestbuffer_init_copy(p_struct, const_cast<alglib_impl::decisionforestbuffer*>(rhs.p_struct), &_state, ae_false);
13036 0 : ae_state_clear(&_state);
13037 0 : return *this;
13038 : }
13039 :
13040 0 : _decisionforestbuffer_owner::~_decisionforestbuffer_owner()
13041 : {
13042 0 : if( p_struct!=NULL )
13043 : {
13044 0 : alglib_impl::_decisionforestbuffer_destroy(p_struct);
13045 0 : ae_free(p_struct);
13046 : }
13047 0 : }
13048 :
13049 0 : alglib_impl::decisionforestbuffer* _decisionforestbuffer_owner::c_ptr()
13050 : {
13051 0 : return p_struct;
13052 : }
13053 :
13054 0 : alglib_impl::decisionforestbuffer* _decisionforestbuffer_owner::c_ptr() const
13055 : {
13056 0 : return const_cast<alglib_impl::decisionforestbuffer*>(p_struct);
13057 : }
13058 0 : decisionforestbuffer::decisionforestbuffer() : _decisionforestbuffer_owner()
13059 : {
13060 0 : }
13061 :
13062 0 : decisionforestbuffer::decisionforestbuffer(const decisionforestbuffer &rhs):_decisionforestbuffer_owner(rhs)
13063 : {
13064 0 : }
13065 :
13066 0 : decisionforestbuffer& decisionforestbuffer::operator=(const decisionforestbuffer &rhs)
13067 : {
13068 0 : if( this==&rhs )
13069 0 : return *this;
13070 0 : _decisionforestbuffer_owner::operator=(rhs);
13071 0 : return *this;
13072 : }
13073 :
13074 0 : decisionforestbuffer::~decisionforestbuffer()
13075 : {
13076 0 : }
13077 :
13078 :
13079 : /*************************************************************************
13080 : Decision forest (random forest) model.
13081 : *************************************************************************/
13082 0 : _decisionforest_owner::_decisionforest_owner()
13083 : {
13084 : jmp_buf _break_jump;
13085 : alglib_impl::ae_state _state;
13086 :
13087 0 : alglib_impl::ae_state_init(&_state);
13088 0 : if( setjmp(_break_jump) )
13089 : {
13090 0 : if( p_struct!=NULL )
13091 : {
13092 0 : alglib_impl::_decisionforest_destroy(p_struct);
13093 0 : alglib_impl::ae_free(p_struct);
13094 : }
13095 0 : p_struct = NULL;
13096 : #if !defined(AE_NO_EXCEPTIONS)
13097 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13098 : #else
13099 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13100 : return;
13101 : #endif
13102 : }
13103 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13104 0 : p_struct = NULL;
13105 0 : p_struct = (alglib_impl::decisionforest*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforest), &_state);
13106 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
13107 0 : alglib_impl::_decisionforest_init(p_struct, &_state, ae_false);
13108 0 : ae_state_clear(&_state);
13109 0 : }
13110 :
13111 0 : _decisionforest_owner::_decisionforest_owner(const _decisionforest_owner &rhs)
13112 : {
13113 : jmp_buf _break_jump;
13114 : alglib_impl::ae_state _state;
13115 :
13116 0 : alglib_impl::ae_state_init(&_state);
13117 0 : if( setjmp(_break_jump) )
13118 : {
13119 0 : if( p_struct!=NULL )
13120 : {
13121 0 : alglib_impl::_decisionforest_destroy(p_struct);
13122 0 : alglib_impl::ae_free(p_struct);
13123 : }
13124 0 : p_struct = NULL;
13125 : #if !defined(AE_NO_EXCEPTIONS)
13126 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13127 : #else
13128 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13129 : return;
13130 : #endif
13131 : }
13132 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13133 0 : p_struct = NULL;
13134 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforest copy constructor failure (source is not initialized)", &_state);
13135 0 : p_struct = (alglib_impl::decisionforest*)alglib_impl::ae_malloc(sizeof(alglib_impl::decisionforest), &_state);
13136 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
13137 0 : alglib_impl::_decisionforest_init_copy(p_struct, const_cast<alglib_impl::decisionforest*>(rhs.p_struct), &_state, ae_false);
13138 0 : ae_state_clear(&_state);
13139 0 : }
13140 :
13141 0 : _decisionforest_owner& _decisionforest_owner::operator=(const _decisionforest_owner &rhs)
13142 : {
13143 0 : if( this==&rhs )
13144 0 : return *this;
13145 : jmp_buf _break_jump;
13146 : alglib_impl::ae_state _state;
13147 :
13148 0 : alglib_impl::ae_state_init(&_state);
13149 0 : if( setjmp(_break_jump) )
13150 : {
13151 : #if !defined(AE_NO_EXCEPTIONS)
13152 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13153 : #else
13154 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13155 : return *this;
13156 : #endif
13157 : }
13158 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13159 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: decisionforest assignment constructor failure (destination is not initialized)", &_state);
13160 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: decisionforest assignment constructor failure (source is not initialized)", &_state);
13161 0 : alglib_impl::_decisionforest_destroy(p_struct);
13162 0 : memset(p_struct, 0, sizeof(alglib_impl::decisionforest));
13163 0 : alglib_impl::_decisionforest_init_copy(p_struct, const_cast<alglib_impl::decisionforest*>(rhs.p_struct), &_state, ae_false);
13164 0 : ae_state_clear(&_state);
13165 0 : return *this;
13166 : }
13167 :
13168 0 : _decisionforest_owner::~_decisionforest_owner()
13169 : {
13170 0 : if( p_struct!=NULL )
13171 : {
13172 0 : alglib_impl::_decisionforest_destroy(p_struct);
13173 0 : ae_free(p_struct);
13174 : }
13175 0 : }
13176 :
13177 0 : alglib_impl::decisionforest* _decisionforest_owner::c_ptr()
13178 : {
13179 0 : return p_struct;
13180 : }
13181 :
13182 0 : alglib_impl::decisionforest* _decisionforest_owner::c_ptr() const
13183 : {
13184 0 : return const_cast<alglib_impl::decisionforest*>(p_struct);
13185 : }
13186 0 : decisionforest::decisionforest() : _decisionforest_owner()
13187 : {
13188 0 : }
13189 :
13190 0 : decisionforest::decisionforest(const decisionforest &rhs):_decisionforest_owner(rhs)
13191 : {
13192 0 : }
13193 :
13194 0 : decisionforest& decisionforest::operator=(const decisionforest &rhs)
13195 : {
13196 0 : if( this==&rhs )
13197 0 : return *this;
13198 0 : _decisionforest_owner::operator=(rhs);
13199 0 : return *this;
13200 : }
13201 :
13202 0 : decisionforest::~decisionforest()
13203 : {
13204 0 : }
13205 :
13206 :
13207 : /*************************************************************************
13208 : Decision forest training report.
13209 :
13210 : === training/oob errors ==================================================
13211 :
13212 : Following fields store training set errors:
13213 : * relclserror - fraction of misclassified cases, [0,1]
13214 : * avgce - average cross-entropy in bits per symbol
13215 : * rmserror - root-mean-square error
13216 : * avgerror - average error
13217 : * avgrelerror - average relative error
13218 :
13219 : Out-of-bag estimates are stored in fields with same names, but "oob" prefix.
13220 :
13221 : For classification problems:
13222 : * RMS, AVG and AVGREL errors are calculated for posterior probabilities
13223 :
13224 : For regression problems:
13225 : * RELCLS and AVGCE errors are zero
13226 :
13227 : === variable importance ==================================================
13228 :
13229 : Following fields are used to store variable importance information:
13230 :
13231 : * topvars - variables ordered from the most important to
13232 : less important ones (according to current
13233 : choice of importance raiting).
13234 : For example, topvars[0] contains index of the
13235 : most important variable, and topvars[0:2] are
13236 : indexes of 3 most important ones and so on.
13237 :
13238 : * varimportances - array[nvars], ratings (the larger, the more
13239 : important the variable is, always in [0,1]
13240 : range).
13241 : By default, filled by zeros (no importance
13242 : ratings are provided unless you explicitly
13243 : request them).
13244 : Zero rating means that variable is not important,
13245 : however you will rarely encounter such a thing,
13246 : in many cases unimportant variables produce
13247 : nearly-zero (but nonzero) ratings.
13248 :
13249 : Variable importance report must be EXPLICITLY requested by calling:
13250 : * dfbuildersetimportancegini() function, if you need out-of-bag Gini-based
13251 : importance rating also known as MDI (fast to calculate, resistant to
13252 : overfitting issues, but has some bias towards continuous and
13253 : high-cardinality categorical variables)
13254 : * dfbuildersetimportancetrngini() function, if you need training set Gini-
13255 : -based importance rating (what other packages typically report).
13256 : * dfbuildersetimportancepermutation() function, if you need permutation-
13257 : based importance rating also known as MDA (slower to calculate, but less
13258 : biased)
13259 : * dfbuildersetimportancenone() function, if you do not need importance
13260 : ratings - ratings will be zero, topvars[] will be [0,1,2,...]
13261 :
13262 : Different importance ratings (Gini or permutation) produce non-comparable
13263 : values. Although in all cases rating values lie in [0,1] range, there are
13264 : exist differences:
13265 : * informally speaking, Gini importance rating tends to divide "unit amount
13266 : of importance" between several important variables, i.e. it produces
13267 : estimates which roughly sum to 1.0 (or less than 1.0, if your task can
13268 : not be solved exactly). If all variables are equally important, they
13269 : will have same rating, roughly 1/NVars, even if every variable is
13270 : critically important.
13271 : * from the other side, permutation importance tells us what percentage of
13272 : the model predictive power will be ruined by permuting this specific
13273 : variable. It does not produce estimates which sum to one. Critically
13274 : important variable will have rating close to 1.0, and you may have
13275 : multiple variables with such a rating.
13276 :
13277 : More information on variable importance ratings can be found in comments
13278 : on the dfbuildersetimportancegini() and dfbuildersetimportancepermutation()
13279 : functions.
13280 : *************************************************************************/
13281 0 : _dfreport_owner::_dfreport_owner()
13282 : {
13283 : jmp_buf _break_jump;
13284 : alglib_impl::ae_state _state;
13285 :
13286 0 : alglib_impl::ae_state_init(&_state);
13287 0 : if( setjmp(_break_jump) )
13288 : {
13289 0 : if( p_struct!=NULL )
13290 : {
13291 0 : alglib_impl::_dfreport_destroy(p_struct);
13292 0 : alglib_impl::ae_free(p_struct);
13293 : }
13294 0 : p_struct = NULL;
13295 : #if !defined(AE_NO_EXCEPTIONS)
13296 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13297 : #else
13298 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13299 : return;
13300 : #endif
13301 : }
13302 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13303 0 : p_struct = NULL;
13304 0 : p_struct = (alglib_impl::dfreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::dfreport), &_state);
13305 0 : memset(p_struct, 0, sizeof(alglib_impl::dfreport));
13306 0 : alglib_impl::_dfreport_init(p_struct, &_state, ae_false);
13307 0 : ae_state_clear(&_state);
13308 0 : }
13309 :
13310 0 : _dfreport_owner::_dfreport_owner(const _dfreport_owner &rhs)
13311 : {
13312 : jmp_buf _break_jump;
13313 : alglib_impl::ae_state _state;
13314 :
13315 0 : alglib_impl::ae_state_init(&_state);
13316 0 : if( setjmp(_break_jump) )
13317 : {
13318 0 : if( p_struct!=NULL )
13319 : {
13320 0 : alglib_impl::_dfreport_destroy(p_struct);
13321 0 : alglib_impl::ae_free(p_struct);
13322 : }
13323 0 : p_struct = NULL;
13324 : #if !defined(AE_NO_EXCEPTIONS)
13325 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13326 : #else
13327 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13328 : return;
13329 : #endif
13330 : }
13331 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13332 0 : p_struct = NULL;
13333 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: dfreport copy constructor failure (source is not initialized)", &_state);
13334 0 : p_struct = (alglib_impl::dfreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::dfreport), &_state);
13335 0 : memset(p_struct, 0, sizeof(alglib_impl::dfreport));
13336 0 : alglib_impl::_dfreport_init_copy(p_struct, const_cast<alglib_impl::dfreport*>(rhs.p_struct), &_state, ae_false);
13337 0 : ae_state_clear(&_state);
13338 0 : }
13339 :
13340 0 : _dfreport_owner& _dfreport_owner::operator=(const _dfreport_owner &rhs)
13341 : {
13342 0 : if( this==&rhs )
13343 0 : return *this;
13344 : jmp_buf _break_jump;
13345 : alglib_impl::ae_state _state;
13346 :
13347 0 : alglib_impl::ae_state_init(&_state);
13348 0 : if( setjmp(_break_jump) )
13349 : {
13350 : #if !defined(AE_NO_EXCEPTIONS)
13351 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
13352 : #else
13353 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
13354 : return *this;
13355 : #endif
13356 : }
13357 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
13358 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: dfreport assignment constructor failure (destination is not initialized)", &_state);
13359 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: dfreport assignment constructor failure (source is not initialized)", &_state);
13360 0 : alglib_impl::_dfreport_destroy(p_struct);
13361 0 : memset(p_struct, 0, sizeof(alglib_impl::dfreport));
13362 0 : alglib_impl::_dfreport_init_copy(p_struct, const_cast<alglib_impl::dfreport*>(rhs.p_struct), &_state, ae_false);
13363 0 : ae_state_clear(&_state);
13364 0 : return *this;
13365 : }
13366 :
13367 0 : _dfreport_owner::~_dfreport_owner()
13368 : {
13369 0 : if( p_struct!=NULL )
13370 : {
13371 0 : alglib_impl::_dfreport_destroy(p_struct);
13372 0 : ae_free(p_struct);
13373 : }
13374 0 : }
13375 :
13376 0 : alglib_impl::dfreport* _dfreport_owner::c_ptr()
13377 : {
13378 0 : return p_struct;
13379 : }
13380 :
13381 0 : alglib_impl::dfreport* _dfreport_owner::c_ptr() const
13382 : {
13383 0 : return const_cast<alglib_impl::dfreport*>(p_struct);
13384 : }
13385 0 : dfreport::dfreport() : _dfreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),oobrelclserror(p_struct->oobrelclserror),oobavgce(p_struct->oobavgce),oobrmserror(p_struct->oobrmserror),oobavgerror(p_struct->oobavgerror),oobavgrelerror(p_struct->oobavgrelerror),topvars(&p_struct->topvars),varimportances(&p_struct->varimportances)
13386 : {
13387 0 : }
13388 :
13389 0 : dfreport::dfreport(const dfreport &rhs):_dfreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror),oobrelclserror(p_struct->oobrelclserror),oobavgce(p_struct->oobavgce),oobrmserror(p_struct->oobrmserror),oobavgerror(p_struct->oobavgerror),oobavgrelerror(p_struct->oobavgrelerror),topvars(&p_struct->topvars),varimportances(&p_struct->varimportances)
13390 : {
13391 0 : }
13392 :
13393 0 : dfreport& dfreport::operator=(const dfreport &rhs)
13394 : {
13395 0 : if( this==&rhs )
13396 0 : return *this;
13397 0 : _dfreport_owner::operator=(rhs);
13398 0 : return *this;
13399 : }
13400 :
13401 0 : dfreport::~dfreport()
13402 : {
13403 0 : }
13404 :
13405 :
13406 : /*************************************************************************
13407 : This function serializes data structure to string.
13408 :
13409 : Important properties of s_out:
13410 : * it contains alphanumeric characters, dots, underscores, minus signs
13411 : * these symbols are grouped into words, which are separated by spaces
13412 : and Windows-style (CR+LF) newlines
13413 : * although serializer uses spaces and CR+LF as separators, you can
13414 : replace any separator character by arbitrary combination of spaces,
13415 : tabs, Windows or Unix newlines. It allows flexible reformatting of
13416 : the string in case you want to include it into text or XML file.
13417 : But you should not insert separators into the middle of the "words"
13418 : nor you should change case of letters.
13419 : * s_out can be freely moved between 32-bit and 64-bit systems, little
13420 : and big endian machines, and so on. You can serialize structure on
13421 : 32-bit machine and unserialize it on 64-bit one (or vice versa), or
13422 : serialize it on SPARC and unserialize on x86. You can also
13423 : serialize it in C++ version of ALGLIB and unserialize in C# one,
13424 : and vice versa.
13425 : *************************************************************************/
13426 0 : void dfserialize(decisionforest &obj, std::string &s_out)
13427 : {
13428 : jmp_buf _break_jump;
13429 : alglib_impl::ae_state state;
13430 : alglib_impl::ae_serializer serializer;
13431 : alglib_impl::ae_int_t ssize;
13432 :
13433 0 : alglib_impl::ae_state_init(&state);
13434 0 : if( setjmp(_break_jump) )
13435 : {
13436 : #if !defined(AE_NO_EXCEPTIONS)
13437 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
13438 : #else
13439 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
13440 : return;
13441 : #endif
13442 : }
13443 0 : ae_state_set_break_jump(&state, &_break_jump);
13444 0 : alglib_impl::ae_serializer_init(&serializer);
13445 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
13446 0 : alglib_impl::dfalloc(&serializer, obj.c_ptr(), &state);
13447 0 : ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
13448 0 : s_out.clear();
13449 0 : s_out.reserve((size_t)(ssize+1));
13450 0 : alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
13451 0 : alglib_impl::dfserialize(&serializer, obj.c_ptr(), &state);
13452 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
13453 0 : alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
13454 0 : alglib_impl::ae_serializer_clear(&serializer);
13455 0 : alglib_impl::ae_state_clear(&state);
13456 0 : }
13457 : /*************************************************************************
13458 : This function unserializes data structure from string.
13459 : *************************************************************************/
13460 0 : void dfunserialize(const std::string &s_in, decisionforest &obj)
13461 : {
13462 : jmp_buf _break_jump;
13463 : alglib_impl::ae_state state;
13464 : alglib_impl::ae_serializer serializer;
13465 :
13466 0 : alglib_impl::ae_state_init(&state);
13467 0 : if( setjmp(_break_jump) )
13468 : {
13469 : #if !defined(AE_NO_EXCEPTIONS)
13470 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
13471 : #else
13472 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
13473 : return;
13474 : #endif
13475 : }
13476 0 : ae_state_set_break_jump(&state, &_break_jump);
13477 0 : alglib_impl::ae_serializer_init(&serializer);
13478 0 : alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
13479 0 : alglib_impl::dfunserialize(&serializer, obj.c_ptr(), &state);
13480 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
13481 0 : alglib_impl::ae_serializer_clear(&serializer);
13482 0 : alglib_impl::ae_state_clear(&state);
13483 0 : }
13484 :
13485 :
13486 : /*************************************************************************
13487 : This function serializes data structure to C++ stream.
13488 :
13489 : Data stream generated by this function is same as string representation
13490 : generated by string version of serializer - alphanumeric characters,
13491 : dots, underscores, minus signs, which are grouped into words separated by
13492 : spaces and CR+LF.
13493 :
13494 : We recommend you to read comments on string version of serializer to find
13495 : out more about serialization of AlGLIB objects.
13496 : *************************************************************************/
13497 0 : void dfserialize(decisionforest &obj, std::ostream &s_out)
13498 : {
13499 : jmp_buf _break_jump;
13500 : alglib_impl::ae_state state;
13501 : alglib_impl::ae_serializer serializer;
13502 :
13503 0 : alglib_impl::ae_state_init(&state);
13504 0 : if( setjmp(_break_jump) )
13505 : {
13506 : #if !defined(AE_NO_EXCEPTIONS)
13507 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
13508 : #else
13509 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
13510 : return;
13511 : #endif
13512 : }
13513 0 : ae_state_set_break_jump(&state, &_break_jump);
13514 0 : alglib_impl::ae_serializer_init(&serializer);
13515 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
13516 0 : alglib_impl::dfalloc(&serializer, obj.c_ptr(), &state);
13517 0 : alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
13518 0 : alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
13519 0 : alglib_impl::dfserialize(&serializer, obj.c_ptr(), &state);
13520 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
13521 0 : alglib_impl::ae_serializer_clear(&serializer);
13522 0 : alglib_impl::ae_state_clear(&state);
13523 0 : }
13524 : /*************************************************************************
13525 : This function unserializes data structure from stream.
13526 : *************************************************************************/
13527 0 : void dfunserialize(const std::istream &s_in, decisionforest &obj)
13528 : {
13529 : jmp_buf _break_jump;
13530 : alglib_impl::ae_state state;
13531 : alglib_impl::ae_serializer serializer;
13532 :
13533 0 : alglib_impl::ae_state_init(&state);
13534 0 : if( setjmp(_break_jump) )
13535 : {
13536 : #if !defined(AE_NO_EXCEPTIONS)
13537 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
13538 : #else
13539 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
13540 : return;
13541 : #endif
13542 : }
13543 0 : ae_state_set_break_jump(&state, &_break_jump);
13544 0 : alglib_impl::ae_serializer_init(&serializer);
13545 0 : alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
13546 0 : alglib_impl::dfunserialize(&serializer, obj.c_ptr(), &state);
13547 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
13548 0 : alglib_impl::ae_serializer_clear(&serializer);
13549 0 : alglib_impl::ae_state_clear(&state);
13550 0 : }
13551 :
13552 : /*************************************************************************
13553 : This function creates buffer structure which can be used to perform
13554 : parallel inference requests.
13555 :
13556 : DF subpackage provides two sets of computing functions - ones which use
13557 : internal buffer of DF model (these functions are single-threaded because
13558 : they use same buffer, which can not shared between threads), and ones
13559 : which use external buffer.
13560 :
13561 : This function is used to initialize external buffer.
13562 :
13563 : INPUT PARAMETERS
13564 : Model - DF model which is associated with newly created buffer
13565 :
13566 : OUTPUT PARAMETERS
13567 : Buf - external buffer.
13568 :
13569 :
13570 : IMPORTANT: buffer object should be used only with model which was used to
13571 : initialize buffer. Any attempt to use buffer with different
13572 : object is dangerous - you may get integrity check failure
13573 : (exception) because sizes of internal arrays do not fit to
13574 : dimensions of the model structure.
13575 :
13576 : -- ALGLIB --
13577 : Copyright 15.02.2019 by Bochkanov Sergey
13578 : *************************************************************************/
13579 0 : void dfcreatebuffer(const decisionforest &model, decisionforestbuffer &buf, const xparams _xparams)
13580 : {
13581 : jmp_buf _break_jump;
13582 : alglib_impl::ae_state _alglib_env_state;
13583 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13584 0 : if( setjmp(_break_jump) )
13585 : {
13586 : #if !defined(AE_NO_EXCEPTIONS)
13587 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13588 : #else
13589 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13590 : return;
13591 : #endif
13592 : }
13593 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13594 0 : if( _xparams.flags!=0x0 )
13595 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13596 0 : alglib_impl::dfcreatebuffer(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::decisionforestbuffer*>(buf.c_ptr()), &_alglib_env_state);
13597 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13598 0 : return;
13599 : }
13600 :
13601 : /*************************************************************************
13602 : This subroutine creates DecisionForestBuilder object which is used to
13603 : train decision forests.
13604 :
13605 : By default, new builder stores empty dataset and some reasonable default
13606 : settings. At the very least, you should specify dataset prior to building
13607 : decision forest. You can also tweak settings of the forest construction
13608 : algorithm (recommended, although default setting should work well).
13609 :
13610 : Following actions are mandatory:
13611 : * calling dfbuildersetdataset() to specify dataset
13612 : * calling dfbuilderbuildrandomforest() to build decision forest using
13613 : current dataset and default settings
13614 :
13615 : Additionally, you may call:
13616 : * dfbuildersetrndvars() or dfbuildersetrndvarsratio() to specify number of
13617 : variables randomly chosen for each split
13618 : * dfbuildersetsubsampleratio() to specify fraction of the dataset randomly
13619 : subsampled to build each tree
13620 : * dfbuildersetseed() to control random seed chosen for tree construction
13621 :
13622 : INPUT PARAMETERS:
13623 : none
13624 :
13625 : OUTPUT PARAMETERS:
13626 : S - decision forest builder
13627 :
13628 : -- ALGLIB --
13629 : Copyright 21.05.2018 by Bochkanov Sergey
13630 : *************************************************************************/
13631 0 : void dfbuildercreate(decisionforestbuilder &s, const xparams _xparams)
13632 : {
13633 : jmp_buf _break_jump;
13634 : alglib_impl::ae_state _alglib_env_state;
13635 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13636 0 : if( setjmp(_break_jump) )
13637 : {
13638 : #if !defined(AE_NO_EXCEPTIONS)
13639 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13640 : #else
13641 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13642 : return;
13643 : #endif
13644 : }
13645 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13646 0 : if( _xparams.flags!=0x0 )
13647 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13648 0 : alglib_impl::dfbuildercreate(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
13649 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13650 0 : return;
13651 : }
13652 :
13653 : /*************************************************************************
13654 : This subroutine adds dense dataset to the internal storage of the builder
13655 : object. Specifying your dataset in the dense format means that the dense
13656 : version of the forest construction algorithm will be invoked.
13657 :
13658 : INPUT PARAMETERS:
13659 : S - decision forest builder object
13660 : XY - array[NPoints,NVars+1] (minimum size; actual size can
13661 : be larger, only leading part is used anyway), dataset:
13662 : * first NVars elements of each row store values of the
13663 : independent variables
13664 : * last column store class number (in 0...NClasses-1)
13665 : or real value of the dependent variable
13666 : NPoints - number of rows in the dataset, NPoints>=1
13667 : NVars - number of independent variables, NVars>=1
13668 : NClasses - indicates type of the problem being solved:
13669 : * NClasses>=2 means that classification problem is
13670 : solved (last column of the dataset stores class
13671 : number)
13672 : * NClasses=1 means that regression problem is solved
13673 : (last column of the dataset stores variable value)
13674 :
13675 : OUTPUT PARAMETERS:
13676 : S - decision forest builder
13677 :
13678 : -- ALGLIB --
13679 : Copyright 21.05.2018 by Bochkanov Sergey
13680 : *************************************************************************/
13681 0 : void dfbuildersetdataset(const decisionforestbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams)
13682 : {
13683 : jmp_buf _break_jump;
13684 : alglib_impl::ae_state _alglib_env_state;
13685 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13686 0 : if( setjmp(_break_jump) )
13687 : {
13688 : #if !defined(AE_NO_EXCEPTIONS)
13689 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13690 : #else
13691 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13692 : return;
13693 : #endif
13694 : }
13695 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13696 0 : if( _xparams.flags!=0x0 )
13697 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13698 0 : alglib_impl::dfbuildersetdataset(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &_alglib_env_state);
13699 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13700 0 : return;
13701 : }
13702 :
13703 : /*************************************************************************
13704 : This function sets number of variables (in [1,NVars] range) used by
13705 : decision forest construction algorithm.
13706 :
13707 : The default option is to use roughly sqrt(NVars) variables.
13708 :
13709 : INPUT PARAMETERS:
13710 : S - decision forest builder object
13711 : RndVars - number of randomly selected variables; values outside
13712 : of [1,NVars] range are silently clipped.
13713 :
13714 : OUTPUT PARAMETERS:
13715 : S - decision forest builder
13716 :
13717 : -- ALGLIB --
13718 : Copyright 21.05.2018 by Bochkanov Sergey
13719 : *************************************************************************/
13720 0 : void dfbuildersetrndvars(const decisionforestbuilder &s, const ae_int_t rndvars, const xparams _xparams)
13721 : {
13722 : jmp_buf _break_jump;
13723 : alglib_impl::ae_state _alglib_env_state;
13724 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13725 0 : if( setjmp(_break_jump) )
13726 : {
13727 : #if !defined(AE_NO_EXCEPTIONS)
13728 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13729 : #else
13730 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13731 : return;
13732 : #endif
13733 : }
13734 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13735 0 : if( _xparams.flags!=0x0 )
13736 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13737 0 : alglib_impl::dfbuildersetrndvars(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), rndvars, &_alglib_env_state);
13738 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13739 0 : return;
13740 : }
13741 :
13742 : /*************************************************************************
13743 : This function sets number of variables used by decision forest construction
13744 : algorithm as a fraction of total variable count (0,1) range.
13745 :
13746 : The default option is to use roughly sqrt(NVars) variables.
13747 :
13748 : INPUT PARAMETERS:
13749 : S - decision forest builder object
13750 : F - round(NVars*F) variables are selected
13751 :
13752 : OUTPUT PARAMETERS:
13753 : S - decision forest builder
13754 :
13755 : -- ALGLIB --
13756 : Copyright 21.05.2018 by Bochkanov Sergey
13757 : *************************************************************************/
13758 0 : void dfbuildersetrndvarsratio(const decisionforestbuilder &s, const double f, const xparams _xparams)
13759 : {
13760 : jmp_buf _break_jump;
13761 : alglib_impl::ae_state _alglib_env_state;
13762 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13763 0 : if( setjmp(_break_jump) )
13764 : {
13765 : #if !defined(AE_NO_EXCEPTIONS)
13766 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13767 : #else
13768 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13769 : return;
13770 : #endif
13771 : }
13772 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13773 0 : if( _xparams.flags!=0x0 )
13774 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13775 0 : alglib_impl::dfbuildersetrndvarsratio(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), f, &_alglib_env_state);
13776 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13777 0 : return;
13778 : }
13779 :
13780 : /*************************************************************************
13781 : This function tells decision forest builder to automatically choose number
13782 : of variables used by decision forest construction algorithm. Roughly
13783 : sqrt(NVars) variables will be used.
13784 :
13785 : INPUT PARAMETERS:
13786 : S - decision forest builder object
13787 :
13788 : OUTPUT PARAMETERS:
13789 : S - decision forest builder
13790 :
13791 : -- ALGLIB --
13792 : Copyright 21.05.2018 by Bochkanov Sergey
13793 : *************************************************************************/
13794 0 : void dfbuildersetrndvarsauto(const decisionforestbuilder &s, const xparams _xparams)
13795 : {
13796 : jmp_buf _break_jump;
13797 : alglib_impl::ae_state _alglib_env_state;
13798 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13799 0 : if( setjmp(_break_jump) )
13800 : {
13801 : #if !defined(AE_NO_EXCEPTIONS)
13802 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13803 : #else
13804 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13805 : return;
13806 : #endif
13807 : }
13808 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13809 0 : if( _xparams.flags!=0x0 )
13810 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13811 0 : alglib_impl::dfbuildersetrndvarsauto(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
13812 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13813 0 : return;
13814 : }
13815 :
13816 : /*************************************************************************
13817 : This function sets size of dataset subsample generated the decision forest
13818 : construction algorithm. Size is specified as a fraction of total dataset
13819 : size.
13820 :
13821 : The default option is to use 50% of the dataset for training, 50% for the
13822 : OOB estimates. You can decrease fraction F down to 10%, 1% or even below
13823 : in order to reduce overfitting.
13824 :
13825 : INPUT PARAMETERS:
13826 : S - decision forest builder object
13827 : F - fraction of the dataset to use, in (0,1] range. Values
13828 : outside of this range will be silently clipped. At
13829 : least one element is always selected for the training
13830 : set.
13831 :
13832 : OUTPUT PARAMETERS:
13833 : S - decision forest builder
13834 :
13835 : -- ALGLIB --
13836 : Copyright 21.05.2018 by Bochkanov Sergey
13837 : *************************************************************************/
13838 0 : void dfbuildersetsubsampleratio(const decisionforestbuilder &s, const double f, const xparams _xparams)
13839 : {
13840 : jmp_buf _break_jump;
13841 : alglib_impl::ae_state _alglib_env_state;
13842 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13843 0 : if( setjmp(_break_jump) )
13844 : {
13845 : #if !defined(AE_NO_EXCEPTIONS)
13846 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13847 : #else
13848 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13849 : return;
13850 : #endif
13851 : }
13852 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13853 0 : if( _xparams.flags!=0x0 )
13854 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13855 0 : alglib_impl::dfbuildersetsubsampleratio(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), f, &_alglib_env_state);
13856 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13857 0 : return;
13858 : }
13859 :
13860 : /*************************************************************************
13861 : This function sets seed used by internal RNG for random subsampling and
13862 : random selection of variable subsets.
13863 :
13864 : By default random seed is used, i.e. every time you build decision forest,
13865 : we seed generator with new value obtained from system-wide RNG. Thus,
13866 : decision forest builder returns non-deterministic results. You can change
13867 : such behavior by specyfing fixed positive seed value.
13868 :
13869 : INPUT PARAMETERS:
13870 : S - decision forest builder object
13871 : SeedVal - seed value:
13872 : * positive values are used for seeding RNG with fixed
13873 : seed, i.e. subsequent runs on same data will return
13874 : same decision forests
13875 : * non-positive seed means that random seed is used
13876 : for every run of builder, i.e. subsequent runs on
13877 : same datasets will return slightly different
13878 : decision forests
13879 :
13880 : OUTPUT PARAMETERS:
13881 : S - decision forest builder, see
13882 :
13883 : -- ALGLIB --
13884 : Copyright 21.05.2018 by Bochkanov Sergey
13885 : *************************************************************************/
13886 0 : void dfbuildersetseed(const decisionforestbuilder &s, const ae_int_t seedval, const xparams _xparams)
13887 : {
13888 : jmp_buf _break_jump;
13889 : alglib_impl::ae_state _alglib_env_state;
13890 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13891 0 : if( setjmp(_break_jump) )
13892 : {
13893 : #if !defined(AE_NO_EXCEPTIONS)
13894 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13895 : #else
13896 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13897 : return;
13898 : #endif
13899 : }
13900 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13901 0 : if( _xparams.flags!=0x0 )
13902 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13903 0 : alglib_impl::dfbuildersetseed(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), seedval, &_alglib_env_state);
13904 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13905 0 : return;
13906 : }
13907 :
13908 : /*************************************************************************
13909 : This function sets random decision forest construction algorithm.
13910 :
13911 : As for now, only one decision forest construction algorithm is supported -
13912 : a dense "baseline" RDF algorithm.
13913 :
13914 : INPUT PARAMETERS:
13915 : S - decision forest builder object
13916 : AlgoType - algorithm type:
13917 : * 0 = baseline dense RDF
13918 :
13919 : OUTPUT PARAMETERS:
13920 : S - decision forest builder, see
13921 :
13922 : -- ALGLIB --
13923 : Copyright 21.05.2018 by Bochkanov Sergey
13924 : *************************************************************************/
13925 0 : void dfbuildersetrdfalgo(const decisionforestbuilder &s, const ae_int_t algotype, const xparams _xparams)
13926 : {
13927 : jmp_buf _break_jump;
13928 : alglib_impl::ae_state _alglib_env_state;
13929 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13930 0 : if( setjmp(_break_jump) )
13931 : {
13932 : #if !defined(AE_NO_EXCEPTIONS)
13933 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13934 : #else
13935 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13936 : return;
13937 : #endif
13938 : }
13939 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13940 0 : if( _xparams.flags!=0x0 )
13941 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13942 0 : alglib_impl::dfbuildersetrdfalgo(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), algotype, &_alglib_env_state);
13943 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13944 0 : return;
13945 : }
13946 :
13947 : /*************************************************************************
13948 : This function sets split selection algorithm used by decision forest
13949 : classifier. You may choose several algorithms, with different speed and
13950 : quality of the results.
13951 :
13952 : INPUT PARAMETERS:
13953 : S - decision forest builder object
13954 : SplitStrength- split type:
13955 : * 0 = split at the random position, fastest one
13956 : * 1 = split at the middle of the range
13957 : * 2 = strong split at the best point of the range (default)
13958 :
13959 : OUTPUT PARAMETERS:
13960 : S - decision forest builder, see
13961 :
13962 : -- ALGLIB --
13963 : Copyright 21.05.2018 by Bochkanov Sergey
13964 : *************************************************************************/
13965 0 : void dfbuildersetrdfsplitstrength(const decisionforestbuilder &s, const ae_int_t splitstrength, const xparams _xparams)
13966 : {
13967 : jmp_buf _break_jump;
13968 : alglib_impl::ae_state _alglib_env_state;
13969 0 : alglib_impl::ae_state_init(&_alglib_env_state);
13970 0 : if( setjmp(_break_jump) )
13971 : {
13972 : #if !defined(AE_NO_EXCEPTIONS)
13973 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
13974 : #else
13975 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
13976 : return;
13977 : #endif
13978 : }
13979 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
13980 0 : if( _xparams.flags!=0x0 )
13981 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
13982 0 : alglib_impl::dfbuildersetrdfsplitstrength(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), splitstrength, &_alglib_env_state);
13983 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
13984 0 : return;
13985 : }
13986 :
13987 : /*************************************************************************
13988 : This function tells decision forest construction algorithm to use
13989 : Gini impurity based variable importance estimation (also known as MDI).
13990 :
13991 : This version of importance estimation algorithm analyzes mean decrease in
13992 : impurity (MDI) on training sample during splits. The result is divided
13993 : by impurity at the root node in order to produce estimate in [0,1] range.
13994 :
13995 : Such estimates are fast to calculate and beautifully normalized (sum to
13996 : one) but have following downsides:
13997 : * They ALWAYS sum to 1.0, even if output is completely unpredictable. I.e.
13998 : MDI allows to order variables by importance, but does not tell us about
13999 : "absolute" importances of variables
14000 : * there exist some bias towards continuous and high-cardinality categorical
14001 : variables
14002 :
14003 : NOTE: informally speaking, MDA (permutation importance) rating answers the
14004 : question "what part of the model predictive power is ruined by
14005 : permuting k-th variable?" while MDI tells us "what part of the model
14006 : predictive power was achieved due to usage of k-th variable".
14007 :
14008 : Thus, MDA rates each variable independently at "0 to 1" scale while
14009 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
14010 : between several important variables.
14011 :
14012 : If all variables are equally important, they will have same
14013 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
14014 : However, roughly same picture will be produced for the "all
14015 : variables provide information no one is critical" situation and for
14016 : the "all variables are critical, drop any one, everything is ruined"
14017 : situation.
14018 :
14019 : Contrary to that, MDA will rate critical variable as ~1.0 important,
14020 : and important but non-critical variable will have less than unit
14021 : rating.
14022 :
14023 : NOTE: quite an often MDA and MDI return same results. It generally happens
14024 : on problems with low test set error (a few percents at most) and
14025 : large enough training set to avoid overfitting.
14026 :
14027 : The difference between MDA, MDI and OOB-MDI becomes important only
14028 : on "hard" tasks with high test set error and/or small training set.
14029 :
14030 : INPUT PARAMETERS:
14031 : S - decision forest builder object
14032 :
14033 : OUTPUT PARAMETERS:
14034 : S - decision forest builder object. Next call to the forest
14035 : construction function will produce:
14036 : * importance estimates in rep.varimportances field
14037 : * variable ranks in rep.topvars field
14038 :
14039 : -- ALGLIB --
14040 : Copyright 29.07.2019 by Bochkanov Sergey
14041 : *************************************************************************/
14042 0 : void dfbuildersetimportancetrngini(const decisionforestbuilder &s, const xparams _xparams)
14043 : {
14044 : jmp_buf _break_jump;
14045 : alglib_impl::ae_state _alglib_env_state;
14046 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14047 0 : if( setjmp(_break_jump) )
14048 : {
14049 : #if !defined(AE_NO_EXCEPTIONS)
14050 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14051 : #else
14052 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14053 : return;
14054 : #endif
14055 : }
14056 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14057 0 : if( _xparams.flags!=0x0 )
14058 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14059 0 : alglib_impl::dfbuildersetimportancetrngini(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14060 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14061 0 : return;
14062 : }
14063 :
14064 : /*************************************************************************
14065 : This function tells decision forest construction algorithm to use
14066 : out-of-bag version of Gini variable importance estimation (also known as
14067 : OOB-MDI).
14068 :
14069 : This version of importance estimation algorithm analyzes mean decrease in
14070 : impurity (MDI) on out-of-bag sample during splits. The result is divided
14071 : by impurity at the root node in order to produce estimate in [0,1] range.
14072 :
14073 : Such estimates are fast to calculate and resistant to overfitting issues
14074 : (thanks to the out-of-bag estimates used). However, OOB Gini rating has
14075 : following downsides:
14076 : * there exist some bias towards continuous and high-cardinality categorical
14077 : variables
14078 : * Gini rating allows us to order variables by importance, but it is hard
14079 : to define importance of the variable by itself.
14080 :
14081 : NOTE: informally speaking, MDA (permutation importance) rating answers the
14082 : question "what part of the model predictive power is ruined by
14083 : permuting k-th variable?" while MDI tells us "what part of the model
14084 : predictive power was achieved due to usage of k-th variable".
14085 :
14086 : Thus, MDA rates each variable independently at "0 to 1" scale while
14087 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
14088 : between several important variables.
14089 :
14090 : If all variables are equally important, they will have same
14091 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
14092 : However, roughly same picture will be produced for the "all
14093 : variables provide information no one is critical" situation and for
14094 : the "all variables are critical, drop any one, everything is ruined"
14095 : situation.
14096 :
14097 : Contrary to that, MDA will rate critical variable as ~1.0 important,
14098 : and important but non-critical variable will have less than unit
14099 : rating.
14100 :
14101 : NOTE: quite an often MDA and MDI return same results. It generally happens
14102 : on problems with low test set error (a few percents at most) and
14103 : large enough training set to avoid overfitting.
14104 :
14105 : The difference between MDA, MDI and OOB-MDI becomes important only
14106 : on "hard" tasks with high test set error and/or small training set.
14107 :
14108 : INPUT PARAMETERS:
14109 : S - decision forest builder object
14110 :
14111 : OUTPUT PARAMETERS:
14112 : S - decision forest builder object. Next call to the forest
14113 : construction function will produce:
14114 : * importance estimates in rep.varimportances field
14115 : * variable ranks in rep.topvars field
14116 :
14117 : -- ALGLIB --
14118 : Copyright 29.07.2019 by Bochkanov Sergey
14119 : *************************************************************************/
14120 0 : void dfbuildersetimportanceoobgini(const decisionforestbuilder &s, const xparams _xparams)
14121 : {
14122 : jmp_buf _break_jump;
14123 : alglib_impl::ae_state _alglib_env_state;
14124 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14125 0 : if( setjmp(_break_jump) )
14126 : {
14127 : #if !defined(AE_NO_EXCEPTIONS)
14128 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14129 : #else
14130 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14131 : return;
14132 : #endif
14133 : }
14134 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14135 0 : if( _xparams.flags!=0x0 )
14136 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14137 0 : alglib_impl::dfbuildersetimportanceoobgini(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14138 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14139 0 : return;
14140 : }
14141 :
14142 : /*************************************************************************
14143 : This function tells decision forest construction algorithm to use
14144 : permutation variable importance estimator (also known as MDA).
14145 :
14146 : This version of importance estimation algorithm analyzes mean increase in
14147 : out-of-bag sum of squared residuals after random permutation of J-th
14148 : variable. The result is divided by error computed with all variables being
14149 : perturbed in order to produce R-squared-like estimate in [0,1] range.
14150 :
14151 : Such estimate is slower to calculate than Gini-based rating because it
14152 : needs multiple inference runs for each of variables being studied.
14153 :
14154 : ALGLIB uses parallelized and highly optimized algorithm which analyzes
14155 : path through the decision tree and allows to handle most perturbations
14156 : in O(1) time; nevertheless, requesting MDA importances may increase forest
14157 : construction time from 10% to 200% (or more, if you have thousands of
14158 : variables).
14159 :
14160 : However, MDA rating has following benefits over Gini-based ones:
14161 : * no bias towards specific variable types
14162 : * ability to directly evaluate "absolute" importance of some variable at
14163 : "0 to 1" scale (contrary to Gini-based rating, which returns comparative
14164 : importances).
14165 :
14166 : NOTE: informally speaking, MDA (permutation importance) rating answers the
14167 : question "what part of the model predictive power is ruined by
14168 : permuting k-th variable?" while MDI tells us "what part of the model
14169 : predictive power was achieved due to usage of k-th variable".
14170 :
14171 : Thus, MDA rates each variable independently at "0 to 1" scale while
14172 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
14173 : between several important variables.
14174 :
14175 : If all variables are equally important, they will have same
14176 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
14177 : However, roughly same picture will be produced for the "all
14178 : variables provide information no one is critical" situation and for
14179 : the "all variables are critical, drop any one, everything is ruined"
14180 : situation.
14181 :
14182 : Contrary to that, MDA will rate critical variable as ~1.0 important,
14183 : and important but non-critical variable will have less than unit
14184 : rating.
14185 :
14186 : NOTE: quite an often MDA and MDI return same results. It generally happens
14187 : on problems with low test set error (a few percents at most) and
14188 : large enough training set to avoid overfitting.
14189 :
14190 : The difference between MDA, MDI and OOB-MDI becomes important only
14191 : on "hard" tasks with high test set error and/or small training set.
14192 :
14193 : INPUT PARAMETERS:
14194 : S - decision forest builder object
14195 :
14196 : OUTPUT PARAMETERS:
14197 : S - decision forest builder object. Next call to the forest
14198 : construction function will produce:
14199 : * importance estimates in rep.varimportances field
14200 : * variable ranks in rep.topvars field
14201 :
14202 : -- ALGLIB --
14203 : Copyright 29.07.2019 by Bochkanov Sergey
14204 : *************************************************************************/
14205 0 : void dfbuildersetimportancepermutation(const decisionforestbuilder &s, const xparams _xparams)
14206 : {
14207 : jmp_buf _break_jump;
14208 : alglib_impl::ae_state _alglib_env_state;
14209 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14210 0 : if( setjmp(_break_jump) )
14211 : {
14212 : #if !defined(AE_NO_EXCEPTIONS)
14213 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14214 : #else
14215 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14216 : return;
14217 : #endif
14218 : }
14219 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14220 0 : if( _xparams.flags!=0x0 )
14221 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14222 0 : alglib_impl::dfbuildersetimportancepermutation(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14223 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14224 0 : return;
14225 : }
14226 :
14227 : /*************************************************************************
14228 : This function tells decision forest construction algorithm to skip
14229 : variable importance estimation.
14230 :
14231 : INPUT PARAMETERS:
14232 : S - decision forest builder object
14233 :
14234 : OUTPUT PARAMETERS:
14235 : S - decision forest builder object. Next call to the forest
14236 : construction function will result in forest being built
14237 : without variable importance estimation.
14238 :
14239 : -- ALGLIB --
14240 : Copyright 29.07.2019 by Bochkanov Sergey
14241 : *************************************************************************/
14242 0 : void dfbuildersetimportancenone(const decisionforestbuilder &s, const xparams _xparams)
14243 : {
14244 : jmp_buf _break_jump;
14245 : alglib_impl::ae_state _alglib_env_state;
14246 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14247 0 : if( setjmp(_break_jump) )
14248 : {
14249 : #if !defined(AE_NO_EXCEPTIONS)
14250 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14251 : #else
14252 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14253 : return;
14254 : #endif
14255 : }
14256 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14257 0 : if( _xparams.flags!=0x0 )
14258 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14259 0 : alglib_impl::dfbuildersetimportancenone(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14260 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14261 0 : return;
14262 : }
14263 :
14264 : /*************************************************************************
14265 : This function is an alias for dfbuilderpeekprogress(), left in ALGLIB for
14266 : backward compatibility reasons.
14267 :
14268 : -- ALGLIB --
14269 : Copyright 21.05.2018 by Bochkanov Sergey
14270 : *************************************************************************/
14271 0 : double dfbuildergetprogress(const decisionforestbuilder &s, const xparams _xparams)
14272 : {
14273 : jmp_buf _break_jump;
14274 : alglib_impl::ae_state _alglib_env_state;
14275 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14276 0 : if( setjmp(_break_jump) )
14277 : {
14278 : #if !defined(AE_NO_EXCEPTIONS)
14279 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14280 : #else
14281 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14282 : return 0;
14283 : #endif
14284 : }
14285 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14286 0 : if( _xparams.flags!=0x0 )
14287 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14288 0 : double result = alglib_impl::dfbuildergetprogress(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14289 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14290 0 : return *(reinterpret_cast<double*>(&result));
14291 : }
14292 :
14293 : /*************************************************************************
14294 : This function is used to peek into decision forest construction process
14295 : from some other thread and get current progress indicator.
14296 :
14297 : It returns value in [0,1].
14298 :
14299 : INPUT PARAMETERS:
14300 : S - decision forest builder object used to build forest
14301 : in some other thread
14302 :
14303 : RESULT:
14304 : progress value, in [0,1]
14305 :
14306 : -- ALGLIB --
14307 : Copyright 21.05.2018 by Bochkanov Sergey
14308 : *************************************************************************/
14309 0 : double dfbuilderpeekprogress(const decisionforestbuilder &s, const xparams _xparams)
14310 : {
14311 : jmp_buf _break_jump;
14312 : alglib_impl::ae_state _alglib_env_state;
14313 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14314 0 : if( setjmp(_break_jump) )
14315 : {
14316 : #if !defined(AE_NO_EXCEPTIONS)
14317 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14318 : #else
14319 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14320 : return 0;
14321 : #endif
14322 : }
14323 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14324 0 : if( _xparams.flags!=0x0 )
14325 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14326 0 : double result = alglib_impl::dfbuilderpeekprogress(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), &_alglib_env_state);
14327 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14328 0 : return *(reinterpret_cast<double*>(&result));
14329 : }
14330 :
14331 : /*************************************************************************
14332 : This subroutine builds decision forest according to current settings using
14333 : dataset internally stored in the builder object. Dense algorithm is used.
14334 :
14335 : NOTE: this function uses dense algorithm for forest construction
14336 : independently from the dataset format (dense or sparse).
14337 :
14338 : NOTE: forest built with this function is stored in-memory using 64-bit
14339 : data structures for offsets/indexes/split values. It is possible to
14340 : convert forest into more memory-efficient compressed binary
14341 : representation. Depending on the problem properties, 3.7x-5.7x
14342 : compression factors are possible.
14343 :
14344 : The downsides of compression are (a) slight reduction in the model
14345 : accuracy and (b) ~1.5x reduction in the inference speed (due to
14346 : increased complexity of the storage format).
14347 :
14348 : See comments on dfbinarycompression() for more info.
14349 :
14350 : Default settings are used by the algorithm; you can tweak them with the
14351 : help of the following functions:
14352 : * dfbuildersetrfactor() - to control a fraction of the dataset used for
14353 : subsampling
14354 : * dfbuildersetrandomvars() - to control number of variables randomly chosen
14355 : for decision rule creation
14356 :
14357 : ! COMMERCIAL EDITION OF ALGLIB:
14358 : !
14359 : ! Commercial Edition of ALGLIB includes following important improvements
14360 : ! of this function:
14361 : ! * high-performance native backend with same C# interface (C# version)
14362 : ! * multithreading support (C++ and C# versions)
14363 : !
14364 : ! We recommend you to read 'Working with commercial version' section of
14365 : ! ALGLIB Reference Manual in order to find out how to use performance-
14366 : ! related features provided by commercial edition of ALGLIB.
14367 :
14368 : INPUT PARAMETERS:
14369 : S - decision forest builder object
14370 : NTrees - NTrees>=1, number of trees to train
14371 :
14372 : OUTPUT PARAMETERS:
14373 : DF - decision forest. You can compress this forest to more
14374 : compact 16-bit representation with dfbinarycompression()
14375 : Rep - report, see below for information on its fields.
14376 :
14377 : === report information produced by forest construction function ==========
14378 :
14379 : Decision forest training report includes following information:
14380 : * training set errors
14381 : * out-of-bag estimates of errors
14382 : * variable importance ratings
14383 :
14384 : Following fields are used to store information:
14385 : * training set errors are stored in rep.relclserror, rep.avgce, rep.rmserror,
14386 : rep.avgerror and rep.avgrelerror
14387 : * out-of-bag estimates of errors are stored in rep.oobrelclserror, rep.oobavgce,
14388 : rep.oobrmserror, rep.oobavgerror and rep.oobavgrelerror
14389 :
14390 : Variable importance reports, if requested by dfbuildersetimportancegini(),
14391 : dfbuildersetimportancetrngini() or dfbuildersetimportancepermutation()
14392 : call, are stored in:
14393 : * rep.varimportances field stores importance ratings
14394 : * rep.topvars stores variable indexes ordered from the most important to
14395 : less important ones
14396 :
14397 : You can find more information about report fields in:
14398 : * comments on dfreport structure
14399 : * comments on dfbuildersetimportancegini function
14400 : * comments on dfbuildersetimportancetrngini function
14401 : * comments on dfbuildersetimportancepermutation function
14402 :
14403 : -- ALGLIB --
14404 : Copyright 21.05.2018 by Bochkanov Sergey
14405 : *************************************************************************/
14406 0 : void dfbuilderbuildrandomforest(const decisionforestbuilder &s, const ae_int_t ntrees, decisionforest &df, dfreport &rep, const xparams _xparams)
14407 : {
14408 : jmp_buf _break_jump;
14409 : alglib_impl::ae_state _alglib_env_state;
14410 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14411 0 : if( setjmp(_break_jump) )
14412 : {
14413 : #if !defined(AE_NO_EXCEPTIONS)
14414 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14415 : #else
14416 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14417 : return;
14418 : #endif
14419 : }
14420 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14421 0 : if( _xparams.flags!=0x0 )
14422 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14423 0 : alglib_impl::dfbuilderbuildrandomforest(const_cast<alglib_impl::decisionforestbuilder*>(s.c_ptr()), ntrees, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
14424 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14425 0 : return;
14426 : }
14427 :
14428 : /*************************************************************************
14429 : This function performs binary compression of the decision forest.
14430 :
14431 : Original decision forest produced by the forest builder is stored using
14432 : 64-bit representation for all numbers - offsets, variable indexes, split
14433 : points.
14434 :
14435 : It is possible to significantly reduce model size by means of:
14436 : * using compressed dynamic encoding for integers (offsets and variable
14437 : indexes), which uses just 1 byte to store small ints (less than 128),
14438 : just 2 bytes for larger values (less than 128^2) and so on
14439 : * storing floating point numbers using 8-bit exponent and 16-bit mantissa
14440 :
14441 : As result, model needs significantly less memory (compression factor
14442 : depends on variable and class counts). In particular:
14443 : * NVars<128 and NClasses<128 result in 4.4x-5.7x model size reduction
14444 : * NVars<16384 and NClasses<128 result in 3.7x-4.5x model size reduction
14445 :
14446 : Such storage format performs lossless compression of all integers, but
14447 : compression of floating point values (split values) is lossy, with roughly
14448 : 0.01% relative error introduced during rounding. Thus, we recommend you to
14449 : re-evaluate model accuracy after compression.
14450 :
14451 : Another downside of compression is ~1.5x reduction in the inference
14452 : speed due to necessity of dynamic decompression of the compressed model.
14453 :
14454 : INPUT PARAMETERS:
14455 : DF - decision forest built by forest builder
14456 :
14457 : OUTPUT PARAMETERS:
14458 : DF - replaced by compressed forest
14459 :
14460 : RESULT:
14461 : compression factor (in-RAM size of the compressed model vs than of the
14462 : uncompressed one), positive number larger than 1.0
14463 :
14464 : -- ALGLIB --
14465 : Copyright 22.07.2019 by Bochkanov Sergey
14466 : *************************************************************************/
14467 0 : double dfbinarycompression(const decisionforest &df, const xparams _xparams)
14468 : {
14469 : jmp_buf _break_jump;
14470 : alglib_impl::ae_state _alglib_env_state;
14471 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14472 0 : if( setjmp(_break_jump) )
14473 : {
14474 : #if !defined(AE_NO_EXCEPTIONS)
14475 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14476 : #else
14477 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14478 : return 0;
14479 : #endif
14480 : }
14481 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14482 0 : if( _xparams.flags!=0x0 )
14483 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14484 0 : double result = alglib_impl::dfbinarycompression(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), &_alglib_env_state);
14485 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14486 0 : return *(reinterpret_cast<double*>(&result));
14487 : }
14488 :
14489 : /*************************************************************************
14490 : Inference using decision forest
14491 :
14492 : IMPORTANT: this function is thread-unsafe and may modify internal
14493 : structures of the model! You can not use same model object for
14494 : parallel evaluation from several threads.
14495 :
14496 : Use dftsprocess() with independent thread-local buffers if
14497 : you need thread-safe evaluation.
14498 :
14499 : INPUT PARAMETERS:
14500 : DF - decision forest model
14501 : X - input vector, array[NVars]
14502 : Y - possibly preallocated buffer, reallocated if too small
14503 :
14504 : OUTPUT PARAMETERS:
14505 : Y - result. Regression estimate when solving regression task,
14506 : vector of posterior probabilities for classification task.
14507 :
14508 : See also DFProcessI.
14509 :
14510 :
14511 : -- ALGLIB --
14512 : Copyright 16.02.2009 by Bochkanov Sergey
14513 : *************************************************************************/
14514 0 : void dfprocess(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
14515 : {
14516 : jmp_buf _break_jump;
14517 : alglib_impl::ae_state _alglib_env_state;
14518 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14519 0 : if( setjmp(_break_jump) )
14520 : {
14521 : #if !defined(AE_NO_EXCEPTIONS)
14522 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14523 : #else
14524 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14525 : return;
14526 : #endif
14527 : }
14528 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14529 0 : if( _xparams.flags!=0x0 )
14530 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14531 0 : alglib_impl::dfprocess(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
14532 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14533 0 : return;
14534 : }
14535 :
14536 : /*************************************************************************
14537 : 'interactive' variant of DFProcess for languages like Python which support
14538 : constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter
14539 :
14540 : This function allocates new array on each call, so it is significantly
14541 : slower than its 'non-interactive' counterpart, but it is more convenient
14542 : when you call it from command line.
14543 :
14544 : IMPORTANT: this function is thread-unsafe and may modify internal
14545 : structures of the model! You can not use same model object for
14546 : parallel evaluation from several threads.
14547 :
14548 : Use dftsprocess() with independent thread-local buffers if
14549 : you need thread-safe evaluation.
14550 :
14551 : -- ALGLIB --
14552 : Copyright 28.02.2010 by Bochkanov Sergey
14553 : *************************************************************************/
14554 0 : void dfprocessi(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
14555 : {
14556 : jmp_buf _break_jump;
14557 : alglib_impl::ae_state _alglib_env_state;
14558 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14559 0 : if( setjmp(_break_jump) )
14560 : {
14561 : #if !defined(AE_NO_EXCEPTIONS)
14562 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14563 : #else
14564 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14565 : return;
14566 : #endif
14567 : }
14568 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14569 0 : if( _xparams.flags!=0x0 )
14570 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14571 0 : alglib_impl::dfprocessi(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
14572 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14573 0 : return;
14574 : }
14575 :
14576 : /*************************************************************************
14577 : This function returns first component of the inferred vector (i.e. one
14578 : with index #0).
14579 :
14580 : It is a convenience wrapper for dfprocess() intended for either:
14581 : * 1-dimensional regression problems
14582 : * 2-class classification problems
14583 :
14584 : In the former case this function returns inference result as scalar, which
14585 : is definitely more convenient that wrapping it as vector. In the latter
14586 : case it returns probability of object belonging to class #0.
14587 :
14588 : If you call it for anything different from two cases above, it will work
14589 : as defined, i.e. return y[0], although it is of less use in such cases.
14590 :
14591 : IMPORTANT: this function is thread-unsafe and modifies internal structures
14592 : of the model! You can not use same model object for parallel
14593 : evaluation from several threads.
14594 :
14595 : Use dftsprocess() with independent thread-local buffers, if
14596 : you need thread-safe evaluation.
14597 :
14598 : INPUT PARAMETERS:
14599 : Model - DF model
14600 : X - input vector, array[0..NVars-1].
14601 :
14602 : RESULT:
14603 : Y[0]
14604 :
14605 : -- ALGLIB --
14606 : Copyright 15.02.2019 by Bochkanov Sergey
14607 : *************************************************************************/
14608 0 : double dfprocess0(const decisionforest &model, const real_1d_array &x, const xparams _xparams)
14609 : {
14610 : jmp_buf _break_jump;
14611 : alglib_impl::ae_state _alglib_env_state;
14612 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14613 0 : if( setjmp(_break_jump) )
14614 : {
14615 : #if !defined(AE_NO_EXCEPTIONS)
14616 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14617 : #else
14618 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14619 : return 0;
14620 : #endif
14621 : }
14622 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14623 0 : if( _xparams.flags!=0x0 )
14624 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14625 0 : double result = alglib_impl::dfprocess0(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
14626 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14627 0 : return *(reinterpret_cast<double*>(&result));
14628 : }
14629 :
14630 : /*************************************************************************
14631 : This function returns most probable class number for an input X. It is
14632 : same as calling dfprocess(model,x,y), then determining i=argmax(y[i]) and
14633 : returning i.
14634 :
14635 : A class number in [0,NOut) range in returned for classification problems,
14636 : -1 is returned when this function is called for regression problems.
14637 :
14638 : IMPORTANT: this function is thread-unsafe and modifies internal structures
14639 : of the model! You can not use same model object for parallel
14640 : evaluation from several threads.
14641 :
14642 : Use dftsprocess() with independent thread-local buffers, if
14643 : you need thread-safe evaluation.
14644 :
14645 : INPUT PARAMETERS:
14646 : Model - decision forest model
14647 : X - input vector, array[0..NVars-1].
14648 :
14649 : RESULT:
14650 : class number, -1 for regression tasks
14651 :
14652 : -- ALGLIB --
14653 : Copyright 15.02.2019 by Bochkanov Sergey
14654 : *************************************************************************/
14655 0 : ae_int_t dfclassify(const decisionforest &model, const real_1d_array &x, const xparams _xparams)
14656 : {
14657 : jmp_buf _break_jump;
14658 : alglib_impl::ae_state _alglib_env_state;
14659 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14660 0 : if( setjmp(_break_jump) )
14661 : {
14662 : #if !defined(AE_NO_EXCEPTIONS)
14663 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14664 : #else
14665 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14666 : return 0;
14667 : #endif
14668 : }
14669 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14670 0 : if( _xparams.flags!=0x0 )
14671 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14672 0 : alglib_impl::ae_int_t result = alglib_impl::dfclassify(const_cast<alglib_impl::decisionforest*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
14673 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14674 0 : return *(reinterpret_cast<ae_int_t*>(&result));
14675 : }
14676 :
14677 : /*************************************************************************
14678 : Inference using decision forest
14679 :
14680 : Thread-safe procesing using external buffer for temporaries.
14681 :
14682 : This function is thread-safe (i.e . you can use same DF model from
14683 : multiple threads) as long as you use different buffer objects for different
14684 : threads.
14685 :
14686 : INPUT PARAMETERS:
14687 : DF - decision forest model
14688 : Buf - buffer object, must be allocated specifically for this
14689 : model with dfcreatebuffer().
14690 : X - input vector, array[NVars]
14691 : Y - possibly preallocated buffer, reallocated if too small
14692 :
14693 : OUTPUT PARAMETERS:
14694 : Y - result. Regression estimate when solving regression task,
14695 : vector of posterior probabilities for classification task.
14696 :
14697 : See also DFProcessI.
14698 :
14699 :
14700 : -- ALGLIB --
14701 : Copyright 16.02.2009 by Bochkanov Sergey
14702 : *************************************************************************/
14703 0 : void dftsprocess(const decisionforest &df, const decisionforestbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
14704 : {
14705 : jmp_buf _break_jump;
14706 : alglib_impl::ae_state _alglib_env_state;
14707 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14708 0 : if( setjmp(_break_jump) )
14709 : {
14710 : #if !defined(AE_NO_EXCEPTIONS)
14711 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14712 : #else
14713 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14714 : return;
14715 : #endif
14716 : }
14717 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14718 0 : if( _xparams.flags!=0x0 )
14719 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14720 0 : alglib_impl::dftsprocess(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::decisionforestbuffer*>(buf.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
14721 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14722 0 : return;
14723 : }
14724 :
14725 : /*************************************************************************
14726 : Relative classification error on the test set
14727 :
14728 : INPUT PARAMETERS:
14729 : DF - decision forest model
14730 : XY - test set
14731 : NPoints - test set size
14732 :
14733 : RESULT:
14734 : percent of incorrectly classified cases.
14735 : Zero if model solves regression task.
14736 :
14737 : -- ALGLIB --
14738 : Copyright 16.02.2009 by Bochkanov Sergey
14739 : *************************************************************************/
14740 0 : double dfrelclserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
14741 : {
14742 : jmp_buf _break_jump;
14743 : alglib_impl::ae_state _alglib_env_state;
14744 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14745 0 : if( setjmp(_break_jump) )
14746 : {
14747 : #if !defined(AE_NO_EXCEPTIONS)
14748 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14749 : #else
14750 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14751 : return 0;
14752 : #endif
14753 : }
14754 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14755 0 : if( _xparams.flags!=0x0 )
14756 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14757 0 : double result = alglib_impl::dfrelclserror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
14758 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14759 0 : return *(reinterpret_cast<double*>(&result));
14760 : }
14761 :
14762 : /*************************************************************************
14763 : Average cross-entropy (in bits per element) on the test set
14764 :
14765 : INPUT PARAMETERS:
14766 : DF - decision forest model
14767 : XY - test set
14768 : NPoints - test set size
14769 :
14770 : RESULT:
14771 : CrossEntropy/(NPoints*LN(2)).
14772 : Zero if model solves regression task.
14773 :
14774 : -- ALGLIB --
14775 : Copyright 16.02.2009 by Bochkanov Sergey
14776 : *************************************************************************/
14777 0 : double dfavgce(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
14778 : {
14779 : jmp_buf _break_jump;
14780 : alglib_impl::ae_state _alglib_env_state;
14781 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14782 0 : if( setjmp(_break_jump) )
14783 : {
14784 : #if !defined(AE_NO_EXCEPTIONS)
14785 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14786 : #else
14787 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14788 : return 0;
14789 : #endif
14790 : }
14791 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14792 0 : if( _xparams.flags!=0x0 )
14793 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14794 0 : double result = alglib_impl::dfavgce(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
14795 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14796 0 : return *(reinterpret_cast<double*>(&result));
14797 : }
14798 :
14799 : /*************************************************************************
14800 : RMS error on the test set
14801 :
14802 : INPUT PARAMETERS:
14803 : DF - decision forest model
14804 : XY - test set
14805 : NPoints - test set size
14806 :
14807 : RESULT:
14808 : root mean square error.
14809 : Its meaning for regression task is obvious. As for
14810 : classification task, RMS error means error when estimating posterior
14811 : probabilities.
14812 :
14813 : -- ALGLIB --
14814 : Copyright 16.02.2009 by Bochkanov Sergey
14815 : *************************************************************************/
14816 0 : double dfrmserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
14817 : {
14818 : jmp_buf _break_jump;
14819 : alglib_impl::ae_state _alglib_env_state;
14820 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14821 0 : if( setjmp(_break_jump) )
14822 : {
14823 : #if !defined(AE_NO_EXCEPTIONS)
14824 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14825 : #else
14826 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14827 : return 0;
14828 : #endif
14829 : }
14830 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14831 0 : if( _xparams.flags!=0x0 )
14832 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14833 0 : double result = alglib_impl::dfrmserror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
14834 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14835 0 : return *(reinterpret_cast<double*>(&result));
14836 : }
14837 :
14838 : /*************************************************************************
14839 : Average error on the test set
14840 :
14841 : INPUT PARAMETERS:
14842 : DF - decision forest model
14843 : XY - test set
14844 : NPoints - test set size
14845 :
14846 : RESULT:
14847 : Its meaning for regression task is obvious. As for
14848 : classification task, it means average error when estimating posterior
14849 : probabilities.
14850 :
14851 : -- ALGLIB --
14852 : Copyright 16.02.2009 by Bochkanov Sergey
14853 : *************************************************************************/
14854 0 : double dfavgerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
14855 : {
14856 : jmp_buf _break_jump;
14857 : alglib_impl::ae_state _alglib_env_state;
14858 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14859 0 : if( setjmp(_break_jump) )
14860 : {
14861 : #if !defined(AE_NO_EXCEPTIONS)
14862 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14863 : #else
14864 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14865 : return 0;
14866 : #endif
14867 : }
14868 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14869 0 : if( _xparams.flags!=0x0 )
14870 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14871 0 : double result = alglib_impl::dfavgerror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
14872 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14873 0 : return *(reinterpret_cast<double*>(&result));
14874 : }
14875 :
14876 : /*************************************************************************
14877 : Average relative error on the test set
14878 :
14879 : INPUT PARAMETERS:
14880 : DF - decision forest model
14881 : XY - test set
14882 : NPoints - test set size
14883 :
14884 : RESULT:
14885 : Its meaning for regression task is obvious. As for
14886 : classification task, it means average relative error when estimating
14887 : posterior probability of belonging to the correct class.
14888 :
14889 : -- ALGLIB --
14890 : Copyright 16.02.2009 by Bochkanov Sergey
14891 : *************************************************************************/
14892 0 : double dfavgrelerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
14893 : {
14894 : jmp_buf _break_jump;
14895 : alglib_impl::ae_state _alglib_env_state;
14896 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14897 0 : if( setjmp(_break_jump) )
14898 : {
14899 : #if !defined(AE_NO_EXCEPTIONS)
14900 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14901 : #else
14902 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14903 : return 0;
14904 : #endif
14905 : }
14906 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14907 0 : if( _xparams.flags!=0x0 )
14908 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14909 0 : double result = alglib_impl::dfavgrelerror(const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
14910 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14911 0 : return *(reinterpret_cast<double*>(&result));
14912 : }
14913 :
14914 : /*************************************************************************
14915 : This subroutine builds random decision forest.
14916 :
14917 : --------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
14918 :
14919 : -- ALGLIB --
14920 : Copyright 19.02.2009 by Bochkanov Sergey
14921 : *************************************************************************/
14922 0 : void dfbuildrandomdecisionforest(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams)
14923 : {
14924 : jmp_buf _break_jump;
14925 : alglib_impl::ae_state _alglib_env_state;
14926 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14927 0 : if( setjmp(_break_jump) )
14928 : {
14929 : #if !defined(AE_NO_EXCEPTIONS)
14930 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14931 : #else
14932 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14933 : return;
14934 : #endif
14935 : }
14936 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14937 0 : if( _xparams.flags!=0x0 )
14938 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14939 0 : alglib_impl::dfbuildrandomdecisionforest(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, ntrees, r, &info, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
14940 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14941 0 : return;
14942 : }
14943 :
14944 : /*************************************************************************
14945 : This subroutine builds random decision forest.
14946 :
14947 : --------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
14948 :
14949 : -- ALGLIB --
14950 : Copyright 19.02.2009 by Bochkanov Sergey
14951 : *************************************************************************/
14952 0 : void dfbuildrandomdecisionforestx1(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const ae_int_t nrndvars, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams)
14953 : {
14954 : jmp_buf _break_jump;
14955 : alglib_impl::ae_state _alglib_env_state;
14956 0 : alglib_impl::ae_state_init(&_alglib_env_state);
14957 0 : if( setjmp(_break_jump) )
14958 : {
14959 : #if !defined(AE_NO_EXCEPTIONS)
14960 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
14961 : #else
14962 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
14963 : return;
14964 : #endif
14965 : }
14966 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
14967 0 : if( _xparams.flags!=0x0 )
14968 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
14969 0 : alglib_impl::dfbuildrandomdecisionforestx1(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, ntrees, nrndvars, r, &info, const_cast<alglib_impl::decisionforest*>(df.c_ptr()), const_cast<alglib_impl::dfreport*>(rep.c_ptr()), &_alglib_env_state);
14970 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
14971 0 : return;
14972 : }
14973 : #endif
14974 :
14975 : #if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
14976 : /*************************************************************************
14977 : Buffer object which is used to perform various requests (usually model
14978 : inference) in the multithreaded mode (multiple threads working with same
14979 : KNN object).
14980 :
14981 : This object should be created with KNNCreateBuffer().
14982 : *************************************************************************/
14983 0 : _knnbuffer_owner::_knnbuffer_owner()
14984 : {
14985 : jmp_buf _break_jump;
14986 : alglib_impl::ae_state _state;
14987 :
14988 0 : alglib_impl::ae_state_init(&_state);
14989 0 : if( setjmp(_break_jump) )
14990 : {
14991 0 : if( p_struct!=NULL )
14992 : {
14993 0 : alglib_impl::_knnbuffer_destroy(p_struct);
14994 0 : alglib_impl::ae_free(p_struct);
14995 : }
14996 0 : p_struct = NULL;
14997 : #if !defined(AE_NO_EXCEPTIONS)
14998 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
14999 : #else
15000 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15001 : return;
15002 : #endif
15003 : }
15004 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15005 0 : p_struct = NULL;
15006 0 : p_struct = (alglib_impl::knnbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuffer), &_state);
15007 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
15008 0 : alglib_impl::_knnbuffer_init(p_struct, &_state, ae_false);
15009 0 : ae_state_clear(&_state);
15010 0 : }
15011 :
15012 0 : _knnbuffer_owner::_knnbuffer_owner(const _knnbuffer_owner &rhs)
15013 : {
15014 : jmp_buf _break_jump;
15015 : alglib_impl::ae_state _state;
15016 :
15017 0 : alglib_impl::ae_state_init(&_state);
15018 0 : if( setjmp(_break_jump) )
15019 : {
15020 0 : if( p_struct!=NULL )
15021 : {
15022 0 : alglib_impl::_knnbuffer_destroy(p_struct);
15023 0 : alglib_impl::ae_free(p_struct);
15024 : }
15025 0 : p_struct = NULL;
15026 : #if !defined(AE_NO_EXCEPTIONS)
15027 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15028 : #else
15029 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15030 : return;
15031 : #endif
15032 : }
15033 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15034 0 : p_struct = NULL;
15035 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuffer copy constructor failure (source is not initialized)", &_state);
15036 0 : p_struct = (alglib_impl::knnbuffer*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuffer), &_state);
15037 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
15038 0 : alglib_impl::_knnbuffer_init_copy(p_struct, const_cast<alglib_impl::knnbuffer*>(rhs.p_struct), &_state, ae_false);
15039 0 : ae_state_clear(&_state);
15040 0 : }
15041 :
15042 0 : _knnbuffer_owner& _knnbuffer_owner::operator=(const _knnbuffer_owner &rhs)
15043 : {
15044 0 : if( this==&rhs )
15045 0 : return *this;
15046 : jmp_buf _break_jump;
15047 : alglib_impl::ae_state _state;
15048 :
15049 0 : alglib_impl::ae_state_init(&_state);
15050 0 : if( setjmp(_break_jump) )
15051 : {
15052 : #if !defined(AE_NO_EXCEPTIONS)
15053 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15054 : #else
15055 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15056 : return *this;
15057 : #endif
15058 : }
15059 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15060 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnbuffer assignment constructor failure (destination is not initialized)", &_state);
15061 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuffer assignment constructor failure (source is not initialized)", &_state);
15062 0 : alglib_impl::_knnbuffer_destroy(p_struct);
15063 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuffer));
15064 0 : alglib_impl::_knnbuffer_init_copy(p_struct, const_cast<alglib_impl::knnbuffer*>(rhs.p_struct), &_state, ae_false);
15065 0 : ae_state_clear(&_state);
15066 0 : return *this;
15067 : }
15068 :
15069 0 : _knnbuffer_owner::~_knnbuffer_owner()
15070 : {
15071 0 : if( p_struct!=NULL )
15072 : {
15073 0 : alglib_impl::_knnbuffer_destroy(p_struct);
15074 0 : ae_free(p_struct);
15075 : }
15076 0 : }
15077 :
15078 0 : alglib_impl::knnbuffer* _knnbuffer_owner::c_ptr()
15079 : {
15080 0 : return p_struct;
15081 : }
15082 :
15083 0 : alglib_impl::knnbuffer* _knnbuffer_owner::c_ptr() const
15084 : {
15085 0 : return const_cast<alglib_impl::knnbuffer*>(p_struct);
15086 : }
15087 0 : knnbuffer::knnbuffer() : _knnbuffer_owner()
15088 : {
15089 0 : }
15090 :
15091 0 : knnbuffer::knnbuffer(const knnbuffer &rhs):_knnbuffer_owner(rhs)
15092 : {
15093 0 : }
15094 :
15095 0 : knnbuffer& knnbuffer::operator=(const knnbuffer &rhs)
15096 : {
15097 0 : if( this==&rhs )
15098 0 : return *this;
15099 0 : _knnbuffer_owner::operator=(rhs);
15100 0 : return *this;
15101 : }
15102 :
15103 0 : knnbuffer::~knnbuffer()
15104 : {
15105 0 : }
15106 :
15107 :
15108 : /*************************************************************************
15109 : A KNN builder object; this object encapsulates dataset and all related
15110 : settings, it is used to create an actual instance of KNN model.
15111 : *************************************************************************/
15112 0 : _knnbuilder_owner::_knnbuilder_owner()
15113 : {
15114 : jmp_buf _break_jump;
15115 : alglib_impl::ae_state _state;
15116 :
15117 0 : alglib_impl::ae_state_init(&_state);
15118 0 : if( setjmp(_break_jump) )
15119 : {
15120 0 : if( p_struct!=NULL )
15121 : {
15122 0 : alglib_impl::_knnbuilder_destroy(p_struct);
15123 0 : alglib_impl::ae_free(p_struct);
15124 : }
15125 0 : p_struct = NULL;
15126 : #if !defined(AE_NO_EXCEPTIONS)
15127 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15128 : #else
15129 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15130 : return;
15131 : #endif
15132 : }
15133 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15134 0 : p_struct = NULL;
15135 0 : p_struct = (alglib_impl::knnbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuilder), &_state);
15136 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
15137 0 : alglib_impl::_knnbuilder_init(p_struct, &_state, ae_false);
15138 0 : ae_state_clear(&_state);
15139 0 : }
15140 :
15141 0 : _knnbuilder_owner::_knnbuilder_owner(const _knnbuilder_owner &rhs)
15142 : {
15143 : jmp_buf _break_jump;
15144 : alglib_impl::ae_state _state;
15145 :
15146 0 : alglib_impl::ae_state_init(&_state);
15147 0 : if( setjmp(_break_jump) )
15148 : {
15149 0 : if( p_struct!=NULL )
15150 : {
15151 0 : alglib_impl::_knnbuilder_destroy(p_struct);
15152 0 : alglib_impl::ae_free(p_struct);
15153 : }
15154 0 : p_struct = NULL;
15155 : #if !defined(AE_NO_EXCEPTIONS)
15156 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15157 : #else
15158 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15159 : return;
15160 : #endif
15161 : }
15162 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15163 0 : p_struct = NULL;
15164 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuilder copy constructor failure (source is not initialized)", &_state);
15165 0 : p_struct = (alglib_impl::knnbuilder*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnbuilder), &_state);
15166 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
15167 0 : alglib_impl::_knnbuilder_init_copy(p_struct, const_cast<alglib_impl::knnbuilder*>(rhs.p_struct), &_state, ae_false);
15168 0 : ae_state_clear(&_state);
15169 0 : }
15170 :
15171 0 : _knnbuilder_owner& _knnbuilder_owner::operator=(const _knnbuilder_owner &rhs)
15172 : {
15173 0 : if( this==&rhs )
15174 0 : return *this;
15175 : jmp_buf _break_jump;
15176 : alglib_impl::ae_state _state;
15177 :
15178 0 : alglib_impl::ae_state_init(&_state);
15179 0 : if( setjmp(_break_jump) )
15180 : {
15181 : #if !defined(AE_NO_EXCEPTIONS)
15182 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15183 : #else
15184 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15185 : return *this;
15186 : #endif
15187 : }
15188 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15189 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnbuilder assignment constructor failure (destination is not initialized)", &_state);
15190 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnbuilder assignment constructor failure (source is not initialized)", &_state);
15191 0 : alglib_impl::_knnbuilder_destroy(p_struct);
15192 0 : memset(p_struct, 0, sizeof(alglib_impl::knnbuilder));
15193 0 : alglib_impl::_knnbuilder_init_copy(p_struct, const_cast<alglib_impl::knnbuilder*>(rhs.p_struct), &_state, ae_false);
15194 0 : ae_state_clear(&_state);
15195 0 : return *this;
15196 : }
15197 :
15198 0 : _knnbuilder_owner::~_knnbuilder_owner()
15199 : {
15200 0 : if( p_struct!=NULL )
15201 : {
15202 0 : alglib_impl::_knnbuilder_destroy(p_struct);
15203 0 : ae_free(p_struct);
15204 : }
15205 0 : }
15206 :
15207 0 : alglib_impl::knnbuilder* _knnbuilder_owner::c_ptr()
15208 : {
15209 0 : return p_struct;
15210 : }
15211 :
15212 0 : alglib_impl::knnbuilder* _knnbuilder_owner::c_ptr() const
15213 : {
15214 0 : return const_cast<alglib_impl::knnbuilder*>(p_struct);
15215 : }
15216 0 : knnbuilder::knnbuilder() : _knnbuilder_owner()
15217 : {
15218 0 : }
15219 :
15220 0 : knnbuilder::knnbuilder(const knnbuilder &rhs):_knnbuilder_owner(rhs)
15221 : {
15222 0 : }
15223 :
15224 0 : knnbuilder& knnbuilder::operator=(const knnbuilder &rhs)
15225 : {
15226 0 : if( this==&rhs )
15227 0 : return *this;
15228 0 : _knnbuilder_owner::operator=(rhs);
15229 0 : return *this;
15230 : }
15231 :
15232 0 : knnbuilder::~knnbuilder()
15233 : {
15234 0 : }
15235 :
15236 :
15237 : /*************************************************************************
15238 : KNN model, can be used for classification or regression
15239 : *************************************************************************/
15240 0 : _knnmodel_owner::_knnmodel_owner()
15241 : {
15242 : jmp_buf _break_jump;
15243 : alglib_impl::ae_state _state;
15244 :
15245 0 : alglib_impl::ae_state_init(&_state);
15246 0 : if( setjmp(_break_jump) )
15247 : {
15248 0 : if( p_struct!=NULL )
15249 : {
15250 0 : alglib_impl::_knnmodel_destroy(p_struct);
15251 0 : alglib_impl::ae_free(p_struct);
15252 : }
15253 0 : p_struct = NULL;
15254 : #if !defined(AE_NO_EXCEPTIONS)
15255 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15256 : #else
15257 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15258 : return;
15259 : #endif
15260 : }
15261 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15262 0 : p_struct = NULL;
15263 0 : p_struct = (alglib_impl::knnmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnmodel), &_state);
15264 0 : memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
15265 0 : alglib_impl::_knnmodel_init(p_struct, &_state, ae_false);
15266 0 : ae_state_clear(&_state);
15267 0 : }
15268 :
15269 0 : _knnmodel_owner::_knnmodel_owner(const _knnmodel_owner &rhs)
15270 : {
15271 : jmp_buf _break_jump;
15272 : alglib_impl::ae_state _state;
15273 :
15274 0 : alglib_impl::ae_state_init(&_state);
15275 0 : if( setjmp(_break_jump) )
15276 : {
15277 0 : if( p_struct!=NULL )
15278 : {
15279 0 : alglib_impl::_knnmodel_destroy(p_struct);
15280 0 : alglib_impl::ae_free(p_struct);
15281 : }
15282 0 : p_struct = NULL;
15283 : #if !defined(AE_NO_EXCEPTIONS)
15284 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15285 : #else
15286 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15287 : return;
15288 : #endif
15289 : }
15290 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15291 0 : p_struct = NULL;
15292 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnmodel copy constructor failure (source is not initialized)", &_state);
15293 0 : p_struct = (alglib_impl::knnmodel*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnmodel), &_state);
15294 0 : memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
15295 0 : alglib_impl::_knnmodel_init_copy(p_struct, const_cast<alglib_impl::knnmodel*>(rhs.p_struct), &_state, ae_false);
15296 0 : ae_state_clear(&_state);
15297 0 : }
15298 :
15299 0 : _knnmodel_owner& _knnmodel_owner::operator=(const _knnmodel_owner &rhs)
15300 : {
15301 0 : if( this==&rhs )
15302 0 : return *this;
15303 : jmp_buf _break_jump;
15304 : alglib_impl::ae_state _state;
15305 :
15306 0 : alglib_impl::ae_state_init(&_state);
15307 0 : if( setjmp(_break_jump) )
15308 : {
15309 : #if !defined(AE_NO_EXCEPTIONS)
15310 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15311 : #else
15312 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15313 : return *this;
15314 : #endif
15315 : }
15316 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15317 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnmodel assignment constructor failure (destination is not initialized)", &_state);
15318 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnmodel assignment constructor failure (source is not initialized)", &_state);
15319 0 : alglib_impl::_knnmodel_destroy(p_struct);
15320 0 : memset(p_struct, 0, sizeof(alglib_impl::knnmodel));
15321 0 : alglib_impl::_knnmodel_init_copy(p_struct, const_cast<alglib_impl::knnmodel*>(rhs.p_struct), &_state, ae_false);
15322 0 : ae_state_clear(&_state);
15323 0 : return *this;
15324 : }
15325 :
15326 0 : _knnmodel_owner::~_knnmodel_owner()
15327 : {
15328 0 : if( p_struct!=NULL )
15329 : {
15330 0 : alglib_impl::_knnmodel_destroy(p_struct);
15331 0 : ae_free(p_struct);
15332 : }
15333 0 : }
15334 :
15335 0 : alglib_impl::knnmodel* _knnmodel_owner::c_ptr()
15336 : {
15337 0 : return p_struct;
15338 : }
15339 :
15340 0 : alglib_impl::knnmodel* _knnmodel_owner::c_ptr() const
15341 : {
15342 0 : return const_cast<alglib_impl::knnmodel*>(p_struct);
15343 : }
15344 0 : knnmodel::knnmodel() : _knnmodel_owner()
15345 : {
15346 0 : }
15347 :
15348 0 : knnmodel::knnmodel(const knnmodel &rhs):_knnmodel_owner(rhs)
15349 : {
15350 0 : }
15351 :
15352 0 : knnmodel& knnmodel::operator=(const knnmodel &rhs)
15353 : {
15354 0 : if( this==&rhs )
15355 0 : return *this;
15356 0 : _knnmodel_owner::operator=(rhs);
15357 0 : return *this;
15358 : }
15359 :
15360 0 : knnmodel::~knnmodel()
15361 : {
15362 0 : }
15363 :
15364 :
15365 : /*************************************************************************
15366 : KNN training report.
15367 :
15368 : Following fields store training set errors:
15369 : * relclserror - fraction of misclassified cases, [0,1]
15370 : * avgce - average cross-entropy in bits per symbol
15371 : * rmserror - root-mean-square error
15372 : * avgerror - average error
15373 : * avgrelerror - average relative error
15374 :
15375 : For classification problems:
15376 : * RMS, AVG and AVGREL errors are calculated for posterior probabilities
15377 :
15378 : For regression problems:
15379 : * RELCLS and AVGCE errors are zero
15380 : *************************************************************************/
15381 0 : _knnreport_owner::_knnreport_owner()
15382 : {
15383 : jmp_buf _break_jump;
15384 : alglib_impl::ae_state _state;
15385 :
15386 0 : alglib_impl::ae_state_init(&_state);
15387 0 : if( setjmp(_break_jump) )
15388 : {
15389 0 : if( p_struct!=NULL )
15390 : {
15391 0 : alglib_impl::_knnreport_destroy(p_struct);
15392 0 : alglib_impl::ae_free(p_struct);
15393 : }
15394 0 : p_struct = NULL;
15395 : #if !defined(AE_NO_EXCEPTIONS)
15396 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15397 : #else
15398 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15399 : return;
15400 : #endif
15401 : }
15402 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15403 0 : p_struct = NULL;
15404 0 : p_struct = (alglib_impl::knnreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnreport), &_state);
15405 0 : memset(p_struct, 0, sizeof(alglib_impl::knnreport));
15406 0 : alglib_impl::_knnreport_init(p_struct, &_state, ae_false);
15407 0 : ae_state_clear(&_state);
15408 0 : }
15409 :
15410 0 : _knnreport_owner::_knnreport_owner(const _knnreport_owner &rhs)
15411 : {
15412 : jmp_buf _break_jump;
15413 : alglib_impl::ae_state _state;
15414 :
15415 0 : alglib_impl::ae_state_init(&_state);
15416 0 : if( setjmp(_break_jump) )
15417 : {
15418 0 : if( p_struct!=NULL )
15419 : {
15420 0 : alglib_impl::_knnreport_destroy(p_struct);
15421 0 : alglib_impl::ae_free(p_struct);
15422 : }
15423 0 : p_struct = NULL;
15424 : #if !defined(AE_NO_EXCEPTIONS)
15425 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15426 : #else
15427 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15428 : return;
15429 : #endif
15430 : }
15431 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15432 0 : p_struct = NULL;
15433 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnreport copy constructor failure (source is not initialized)", &_state);
15434 0 : p_struct = (alglib_impl::knnreport*)alglib_impl::ae_malloc(sizeof(alglib_impl::knnreport), &_state);
15435 0 : memset(p_struct, 0, sizeof(alglib_impl::knnreport));
15436 0 : alglib_impl::_knnreport_init_copy(p_struct, const_cast<alglib_impl::knnreport*>(rhs.p_struct), &_state, ae_false);
15437 0 : ae_state_clear(&_state);
15438 0 : }
15439 :
15440 0 : _knnreport_owner& _knnreport_owner::operator=(const _knnreport_owner &rhs)
15441 : {
15442 0 : if( this==&rhs )
15443 0 : return *this;
15444 : jmp_buf _break_jump;
15445 : alglib_impl::ae_state _state;
15446 :
15447 0 : alglib_impl::ae_state_init(&_state);
15448 0 : if( setjmp(_break_jump) )
15449 : {
15450 : #if !defined(AE_NO_EXCEPTIONS)
15451 0 : _ALGLIB_CPP_EXCEPTION(_state.error_msg);
15452 : #else
15453 : _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
15454 : return *this;
15455 : #endif
15456 : }
15457 0 : alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
15458 0 : alglib_impl::ae_assert(p_struct!=NULL, "ALGLIB: knnreport assignment constructor failure (destination is not initialized)", &_state);
15459 0 : alglib_impl::ae_assert(rhs.p_struct!=NULL, "ALGLIB: knnreport assignment constructor failure (source is not initialized)", &_state);
15460 0 : alglib_impl::_knnreport_destroy(p_struct);
15461 0 : memset(p_struct, 0, sizeof(alglib_impl::knnreport));
15462 0 : alglib_impl::_knnreport_init_copy(p_struct, const_cast<alglib_impl::knnreport*>(rhs.p_struct), &_state, ae_false);
15463 0 : ae_state_clear(&_state);
15464 0 : return *this;
15465 : }
15466 :
15467 0 : _knnreport_owner::~_knnreport_owner()
15468 : {
15469 0 : if( p_struct!=NULL )
15470 : {
15471 0 : alglib_impl::_knnreport_destroy(p_struct);
15472 0 : ae_free(p_struct);
15473 : }
15474 0 : }
15475 :
15476 0 : alglib_impl::knnreport* _knnreport_owner::c_ptr()
15477 : {
15478 0 : return p_struct;
15479 : }
15480 :
15481 0 : alglib_impl::knnreport* _knnreport_owner::c_ptr() const
15482 : {
15483 0 : return const_cast<alglib_impl::knnreport*>(p_struct);
15484 : }
15485 0 : knnreport::knnreport() : _knnreport_owner() ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
15486 : {
15487 0 : }
15488 :
15489 0 : knnreport::knnreport(const knnreport &rhs):_knnreport_owner(rhs) ,relclserror(p_struct->relclserror),avgce(p_struct->avgce),rmserror(p_struct->rmserror),avgerror(p_struct->avgerror),avgrelerror(p_struct->avgrelerror)
15490 : {
15491 0 : }
15492 :
15493 0 : knnreport& knnreport::operator=(const knnreport &rhs)
15494 : {
15495 0 : if( this==&rhs )
15496 0 : return *this;
15497 0 : _knnreport_owner::operator=(rhs);
15498 0 : return *this;
15499 : }
15500 :
15501 0 : knnreport::~knnreport()
15502 : {
15503 0 : }
15504 :
15505 :
15506 : /*************************************************************************
15507 : This function serializes data structure to string.
15508 :
15509 : Important properties of s_out:
15510 : * it contains alphanumeric characters, dots, underscores, minus signs
15511 : * these symbols are grouped into words, which are separated by spaces
15512 : and Windows-style (CR+LF) newlines
15513 : * although serializer uses spaces and CR+LF as separators, you can
15514 : replace any separator character by arbitrary combination of spaces,
15515 : tabs, Windows or Unix newlines. It allows flexible reformatting of
15516 : the string in case you want to include it into text or XML file.
15517 : But you should not insert separators into the middle of the "words"
15518 : nor you should change case of letters.
15519 : * s_out can be freely moved between 32-bit and 64-bit systems, little
15520 : and big endian machines, and so on. You can serialize structure on
15521 : 32-bit machine and unserialize it on 64-bit one (or vice versa), or
15522 : serialize it on SPARC and unserialize on x86. You can also
15523 : serialize it in C++ version of ALGLIB and unserialize in C# one,
15524 : and vice versa.
15525 : *************************************************************************/
15526 0 : void knnserialize(knnmodel &obj, std::string &s_out)
15527 : {
15528 : jmp_buf _break_jump;
15529 : alglib_impl::ae_state state;
15530 : alglib_impl::ae_serializer serializer;
15531 : alglib_impl::ae_int_t ssize;
15532 :
15533 0 : alglib_impl::ae_state_init(&state);
15534 0 : if( setjmp(_break_jump) )
15535 : {
15536 : #if !defined(AE_NO_EXCEPTIONS)
15537 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
15538 : #else
15539 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
15540 : return;
15541 : #endif
15542 : }
15543 0 : ae_state_set_break_jump(&state, &_break_jump);
15544 0 : alglib_impl::ae_serializer_init(&serializer);
15545 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
15546 0 : alglib_impl::knnalloc(&serializer, obj.c_ptr(), &state);
15547 0 : ssize = alglib_impl::ae_serializer_get_alloc_size(&serializer);
15548 0 : s_out.clear();
15549 0 : s_out.reserve((size_t)(ssize+1));
15550 0 : alglib_impl::ae_serializer_sstart_str(&serializer, &s_out);
15551 0 : alglib_impl::knnserialize(&serializer, obj.c_ptr(), &state);
15552 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
15553 0 : alglib_impl::ae_assert( s_out.length()<=(size_t)ssize, "ALGLIB: serialization integrity error", &state);
15554 0 : alglib_impl::ae_serializer_clear(&serializer);
15555 0 : alglib_impl::ae_state_clear(&state);
15556 0 : }
15557 : /*************************************************************************
15558 : This function unserializes data structure from string.
15559 : *************************************************************************/
15560 0 : void knnunserialize(const std::string &s_in, knnmodel &obj)
15561 : {
15562 : jmp_buf _break_jump;
15563 : alglib_impl::ae_state state;
15564 : alglib_impl::ae_serializer serializer;
15565 :
15566 0 : alglib_impl::ae_state_init(&state);
15567 0 : if( setjmp(_break_jump) )
15568 : {
15569 : #if !defined(AE_NO_EXCEPTIONS)
15570 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
15571 : #else
15572 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
15573 : return;
15574 : #endif
15575 : }
15576 0 : ae_state_set_break_jump(&state, &_break_jump);
15577 0 : alglib_impl::ae_serializer_init(&serializer);
15578 0 : alglib_impl::ae_serializer_ustart_str(&serializer, &s_in);
15579 0 : alglib_impl::knnunserialize(&serializer, obj.c_ptr(), &state);
15580 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
15581 0 : alglib_impl::ae_serializer_clear(&serializer);
15582 0 : alglib_impl::ae_state_clear(&state);
15583 0 : }
15584 :
15585 :
15586 : /*************************************************************************
15587 : This function serializes data structure to C++ stream.
15588 :
15589 : Data stream generated by this function is same as string representation
15590 : generated by string version of serializer - alphanumeric characters,
15591 : dots, underscores, minus signs, which are grouped into words separated by
15592 : spaces and CR+LF.
15593 :
15594 : We recommend you to read comments on string version of serializer to find
15595 : out more about serialization of AlGLIB objects.
15596 : *************************************************************************/
15597 0 : void knnserialize(knnmodel &obj, std::ostream &s_out)
15598 : {
15599 : jmp_buf _break_jump;
15600 : alglib_impl::ae_state state;
15601 : alglib_impl::ae_serializer serializer;
15602 :
15603 0 : alglib_impl::ae_state_init(&state);
15604 0 : if( setjmp(_break_jump) )
15605 : {
15606 : #if !defined(AE_NO_EXCEPTIONS)
15607 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
15608 : #else
15609 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
15610 : return;
15611 : #endif
15612 : }
15613 0 : ae_state_set_break_jump(&state, &_break_jump);
15614 0 : alglib_impl::ae_serializer_init(&serializer);
15615 0 : alglib_impl::ae_serializer_alloc_start(&serializer);
15616 0 : alglib_impl::knnalloc(&serializer, obj.c_ptr(), &state);
15617 0 : alglib_impl::ae_serializer_get_alloc_size(&serializer); // not actually needed, but we have to ask
15618 0 : alglib_impl::ae_serializer_sstart_stream(&serializer, &s_out);
15619 0 : alglib_impl::knnserialize(&serializer, obj.c_ptr(), &state);
15620 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
15621 0 : alglib_impl::ae_serializer_clear(&serializer);
15622 0 : alglib_impl::ae_state_clear(&state);
15623 0 : }
15624 : /*************************************************************************
15625 : This function unserializes data structure from stream.
15626 : *************************************************************************/
15627 0 : void knnunserialize(const std::istream &s_in, knnmodel &obj)
15628 : {
15629 : jmp_buf _break_jump;
15630 : alglib_impl::ae_state state;
15631 : alglib_impl::ae_serializer serializer;
15632 :
15633 0 : alglib_impl::ae_state_init(&state);
15634 0 : if( setjmp(_break_jump) )
15635 : {
15636 : #if !defined(AE_NO_EXCEPTIONS)
15637 0 : _ALGLIB_CPP_EXCEPTION(state.error_msg);
15638 : #else
15639 : _ALGLIB_SET_ERROR_FLAG(state.error_msg);
15640 : return;
15641 : #endif
15642 : }
15643 0 : ae_state_set_break_jump(&state, &_break_jump);
15644 0 : alglib_impl::ae_serializer_init(&serializer);
15645 0 : alglib_impl::ae_serializer_ustart_stream(&serializer, &s_in);
15646 0 : alglib_impl::knnunserialize(&serializer, obj.c_ptr(), &state);
15647 0 : alglib_impl::ae_serializer_stop(&serializer, &state);
15648 0 : alglib_impl::ae_serializer_clear(&serializer);
15649 0 : alglib_impl::ae_state_clear(&state);
15650 0 : }
15651 :
15652 : /*************************************************************************
15653 : This function creates buffer structure which can be used to perform
15654 : parallel KNN requests.
15655 :
15656 : KNN subpackage provides two sets of computing functions - ones which use
15657 : internal buffer of KNN model (these functions are single-threaded because
15658 : they use same buffer, which can not shared between threads), and ones
15659 : which use external buffer.
15660 :
15661 : This function is used to initialize external buffer.
15662 :
15663 : INPUT PARAMETERS
15664 : Model - KNN model which is associated with newly created buffer
15665 :
15666 : OUTPUT PARAMETERS
15667 : Buf - external buffer.
15668 :
15669 :
15670 : IMPORTANT: buffer object should be used only with model which was used to
15671 : initialize buffer. Any attempt to use buffer with different
15672 : object is dangerous - you may get integrity check failure
15673 : (exception) because sizes of internal arrays do not fit to
15674 : dimensions of the model structure.
15675 :
15676 : -- ALGLIB --
15677 : Copyright 15.02.2019 by Bochkanov Sergey
15678 : *************************************************************************/
15679 0 : void knncreatebuffer(const knnmodel &model, knnbuffer &buf, const xparams _xparams)
15680 : {
15681 : jmp_buf _break_jump;
15682 : alglib_impl::ae_state _alglib_env_state;
15683 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15684 0 : if( setjmp(_break_jump) )
15685 : {
15686 : #if !defined(AE_NO_EXCEPTIONS)
15687 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15688 : #else
15689 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15690 : return;
15691 : #endif
15692 : }
15693 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15694 0 : if( _xparams.flags!=0x0 )
15695 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15696 0 : alglib_impl::knncreatebuffer(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnbuffer*>(buf.c_ptr()), &_alglib_env_state);
15697 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15698 0 : return;
15699 : }
15700 :
15701 : /*************************************************************************
15702 : This subroutine creates KNNBuilder object which is used to train KNN models.
15703 :
15704 : By default, new builder stores empty dataset and some reasonable default
15705 : settings. At the very least, you should specify dataset prior to building
15706 : KNN model. You can also tweak settings of the model construction algorithm
15707 : (recommended, although default settings should work well).
15708 :
15709 : Following actions are mandatory:
15710 : * calling knnbuildersetdataset() to specify dataset
15711 : * calling knnbuilderbuildknnmodel() to build KNN model using current
15712 : dataset and default settings
15713 :
15714 : Additionally, you may call:
15715 : * knnbuildersetnorm() to change norm being used
15716 :
15717 : INPUT PARAMETERS:
15718 : none
15719 :
15720 : OUTPUT PARAMETERS:
15721 : S - KNN builder
15722 :
15723 : -- ALGLIB --
15724 : Copyright 15.02.2019 by Bochkanov Sergey
15725 : *************************************************************************/
15726 0 : void knnbuildercreate(knnbuilder &s, const xparams _xparams)
15727 : {
15728 : jmp_buf _break_jump;
15729 : alglib_impl::ae_state _alglib_env_state;
15730 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15731 0 : if( setjmp(_break_jump) )
15732 : {
15733 : #if !defined(AE_NO_EXCEPTIONS)
15734 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15735 : #else
15736 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15737 : return;
15738 : #endif
15739 : }
15740 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15741 0 : if( _xparams.flags!=0x0 )
15742 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15743 0 : alglib_impl::knnbuildercreate(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), &_alglib_env_state);
15744 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15745 0 : return;
15746 : }
15747 :
15748 : /*************************************************************************
15749 : Specifies regression problem (one or more continuous output variables are
15750 : predicted). There also exists "classification" version of this function.
15751 :
15752 : This subroutine adds dense dataset to the internal storage of the builder
15753 : object. Specifying your dataset in the dense format means that the dense
15754 : version of the KNN construction algorithm will be invoked.
15755 :
15756 : INPUT PARAMETERS:
15757 : S - KNN builder object
15758 : XY - array[NPoints,NVars+NOut] (note: actual size can be
15759 : larger, only leading part is used anyway), dataset:
15760 : * first NVars elements of each row store values of the
15761 : independent variables
15762 : * next NOut elements store values of the dependent
15763 : variables
15764 : NPoints - number of rows in the dataset, NPoints>=1
15765 : NVars - number of independent variables, NVars>=1
15766 : NOut - number of dependent variables, NOut>=1
15767 :
15768 : OUTPUT PARAMETERS:
15769 : S - KNN builder
15770 :
15771 : -- ALGLIB --
15772 : Copyright 15.02.2019 by Bochkanov Sergey
15773 : *************************************************************************/
15774 0 : void knnbuildersetdatasetreg(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nout, const xparams _xparams)
15775 : {
15776 : jmp_buf _break_jump;
15777 : alglib_impl::ae_state _alglib_env_state;
15778 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15779 0 : if( setjmp(_break_jump) )
15780 : {
15781 : #if !defined(AE_NO_EXCEPTIONS)
15782 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15783 : #else
15784 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15785 : return;
15786 : #endif
15787 : }
15788 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15789 0 : if( _xparams.flags!=0x0 )
15790 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15791 0 : alglib_impl::knnbuildersetdatasetreg(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nout, &_alglib_env_state);
15792 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15793 0 : return;
15794 : }
15795 :
15796 : /*************************************************************************
15797 : Specifies classification problem (two or more classes are predicted).
15798 : There also exists "regression" version of this function.
15799 :
15800 : This subroutine adds dense dataset to the internal storage of the builder
15801 : object. Specifying your dataset in the dense format means that the dense
15802 : version of the KNN construction algorithm will be invoked.
15803 :
15804 : INPUT PARAMETERS:
15805 : S - KNN builder object
15806 : XY - array[NPoints,NVars+1] (note: actual size can be
15807 : larger, only leading part is used anyway), dataset:
15808 : * first NVars elements of each row store values of the
15809 : independent variables
15810 : * next element stores class index, in [0,NClasses)
15811 : NPoints - number of rows in the dataset, NPoints>=1
15812 : NVars - number of independent variables, NVars>=1
15813 : NClasses - number of classes, NClasses>=2
15814 :
15815 : OUTPUT PARAMETERS:
15816 : S - KNN builder
15817 :
15818 : -- ALGLIB --
15819 : Copyright 15.02.2019 by Bochkanov Sergey
15820 : *************************************************************************/
15821 0 : void knnbuildersetdatasetcls(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams)
15822 : {
15823 : jmp_buf _break_jump;
15824 : alglib_impl::ae_state _alglib_env_state;
15825 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15826 0 : if( setjmp(_break_jump) )
15827 : {
15828 : #if !defined(AE_NO_EXCEPTIONS)
15829 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15830 : #else
15831 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15832 : return;
15833 : #endif
15834 : }
15835 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15836 0 : if( _xparams.flags!=0x0 )
15837 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15838 0 : alglib_impl::knnbuildersetdatasetcls(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, nclasses, &_alglib_env_state);
15839 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15840 0 : return;
15841 : }
15842 :
15843 : /*************************************************************************
15844 : This function sets norm type used for neighbor search.
15845 :
15846 : INPUT PARAMETERS:
15847 : S - decision forest builder object
15848 : NormType - norm type:
15849 : * 0 inf-norm
15850 : * 1 1-norm
15851 : * 2 Euclidean norm (default)
15852 :
15853 : OUTPUT PARAMETERS:
15854 : S - decision forest builder
15855 :
15856 : -- ALGLIB --
15857 : Copyright 15.02.2019 by Bochkanov Sergey
15858 : *************************************************************************/
15859 0 : void knnbuildersetnorm(const knnbuilder &s, const ae_int_t nrmtype, const xparams _xparams)
15860 : {
15861 : jmp_buf _break_jump;
15862 : alglib_impl::ae_state _alglib_env_state;
15863 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15864 0 : if( setjmp(_break_jump) )
15865 : {
15866 : #if !defined(AE_NO_EXCEPTIONS)
15867 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15868 : #else
15869 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15870 : return;
15871 : #endif
15872 : }
15873 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15874 0 : if( _xparams.flags!=0x0 )
15875 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15876 0 : alglib_impl::knnbuildersetnorm(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), nrmtype, &_alglib_env_state);
15877 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15878 0 : return;
15879 : }
15880 :
15881 : /*************************************************************************
15882 : This subroutine builds KNN model according to current settings, using
15883 : dataset internally stored in the builder object.
15884 :
15885 : The model being built performs inference using Eps-approximate K nearest
15886 : neighbors search algorithm, with:
15887 : * K=1, Eps=0 corresponding to the "nearest neighbor algorithm"
15888 : * K>1, Eps=0 corresponding to the "K nearest neighbors algorithm"
15889 : * K>=1, Eps>0 corresponding to "approximate nearest neighbors algorithm"
15890 :
15891 : An approximate KNN is a good option for high-dimensional datasets (exact
15892 : KNN works slowly when dimensions count grows).
15893 :
15894 : An ALGLIB implementation of kd-trees is used to perform k-nn searches.
15895 :
15896 : ! COMMERCIAL EDITION OF ALGLIB:
15897 : !
15898 : ! Commercial Edition of ALGLIB includes following important improvements
15899 : ! of this function:
15900 : ! * high-performance native backend with same C# interface (C# version)
15901 : ! * multithreading support (C++ and C# versions)
15902 : !
15903 : ! We recommend you to read 'Working with commercial version' section of
15904 : ! ALGLIB Reference Manual in order to find out how to use performance-
15905 : ! related features provided by commercial edition of ALGLIB.
15906 :
15907 : INPUT PARAMETERS:
15908 : S - KNN builder object
15909 : K - number of neighbors to search for, K>=1
15910 : Eps - approximation factor:
15911 : * Eps=0 means that exact kNN search is performed
15912 : * Eps>0 means that (1+Eps)-approximate search is performed
15913 :
15914 : OUTPUT PARAMETERS:
15915 : Model - KNN model
15916 : Rep - report
15917 :
15918 : -- ALGLIB --
15919 : Copyright 15.02.2019 by Bochkanov Sergey
15920 : *************************************************************************/
15921 0 : void knnbuilderbuildknnmodel(const knnbuilder &s, const ae_int_t k, const double eps, knnmodel &model, knnreport &rep, const xparams _xparams)
15922 : {
15923 : jmp_buf _break_jump;
15924 : alglib_impl::ae_state _alglib_env_state;
15925 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15926 0 : if( setjmp(_break_jump) )
15927 : {
15928 : #if !defined(AE_NO_EXCEPTIONS)
15929 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15930 : #else
15931 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15932 : return;
15933 : #endif
15934 : }
15935 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15936 0 : if( _xparams.flags!=0x0 )
15937 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15938 0 : alglib_impl::knnbuilderbuildknnmodel(const_cast<alglib_impl::knnbuilder*>(s.c_ptr()), k, eps, const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnreport*>(rep.c_ptr()), &_alglib_env_state);
15939 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15940 0 : return;
15941 : }
15942 :
15943 : /*************************************************************************
15944 : Changing search settings of KNN model.
15945 :
15946 : K and EPS parameters of KNN (AKNN) search are specified during model
15947 : construction. However, plain KNN algorithm with Euclidean distance allows
15948 : you to change them at any moment.
15949 :
15950 : NOTE: future versions of KNN model may support advanced versions of KNN,
15951 : such as NCA or LMNN. It is possible that such algorithms won't allow
15952 : you to change search settings on the fly. If you call this function
15953 : for an algorithm which does not support on-the-fly changes, it will
15954 : throw an exception.
15955 :
15956 : INPUT PARAMETERS:
15957 : Model - KNN model
15958 : K - K>=1, neighbors count
15959 : EPS - accuracy of the EPS-approximate NN search. Set to 0.0, if
15960 : you want to perform "classic" KNN search. Specify larger
15961 : values if you need to speed-up high-dimensional KNN
15962 : queries.
15963 :
15964 : OUTPUT PARAMETERS:
15965 : nothing on success, exception on failure
15966 :
15967 : -- ALGLIB --
15968 : Copyright 15.02.2019 by Bochkanov Sergey
15969 : *************************************************************************/
15970 0 : void knnrewritekeps(const knnmodel &model, const ae_int_t k, const double eps, const xparams _xparams)
15971 : {
15972 : jmp_buf _break_jump;
15973 : alglib_impl::ae_state _alglib_env_state;
15974 0 : alglib_impl::ae_state_init(&_alglib_env_state);
15975 0 : if( setjmp(_break_jump) )
15976 : {
15977 : #if !defined(AE_NO_EXCEPTIONS)
15978 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
15979 : #else
15980 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
15981 : return;
15982 : #endif
15983 : }
15984 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
15985 0 : if( _xparams.flags!=0x0 )
15986 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
15987 0 : alglib_impl::knnrewritekeps(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), k, eps, &_alglib_env_state);
15988 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
15989 0 : return;
15990 : }
15991 :
15992 : /*************************************************************************
15993 : Inference using KNN model.
15994 :
15995 : See also knnprocess0(), knnprocessi() and knnclassify() for options with a
15996 : bit more convenient interface.
15997 :
15998 : IMPORTANT: this function is thread-unsafe and modifies internal structures
15999 : of the model! You can not use same model object for parallel
16000 : evaluation from several threads.
16001 :
16002 : Use knntsprocess() with independent thread-local buffers, if
16003 : you need thread-safe evaluation.
16004 :
16005 : INPUT PARAMETERS:
16006 : Model - KNN model
16007 : X - input vector, array[0..NVars-1].
16008 : Y - possible preallocated buffer. Reused if long enough.
16009 :
16010 : OUTPUT PARAMETERS:
16011 : Y - result. Regression estimate when solving regression task,
16012 : vector of posterior probabilities for classification task.
16013 :
16014 : -- ALGLIB --
16015 : Copyright 15.02.2019 by Bochkanov Sergey
16016 : *************************************************************************/
16017 0 : void knnprocess(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
16018 : {
16019 : jmp_buf _break_jump;
16020 : alglib_impl::ae_state _alglib_env_state;
16021 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16022 0 : if( setjmp(_break_jump) )
16023 : {
16024 : #if !defined(AE_NO_EXCEPTIONS)
16025 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16026 : #else
16027 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16028 : return;
16029 : #endif
16030 : }
16031 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16032 0 : if( _xparams.flags!=0x0 )
16033 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16034 0 : alglib_impl::knnprocess(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
16035 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16036 0 : return;
16037 : }
16038 :
16039 : /*************************************************************************
16040 : This function returns first component of the inferred vector (i.e. one
16041 : with index #0).
16042 :
16043 : It is a convenience wrapper for knnprocess() intended for either:
16044 : * 1-dimensional regression problems
16045 : * 2-class classification problems
16046 :
16047 : In the former case this function returns inference result as scalar, which
16048 : is definitely more convenient that wrapping it as vector. In the latter
16049 : case it returns probability of object belonging to class #0.
16050 :
16051 : If you call it for anything different from two cases above, it will work
16052 : as defined, i.e. return y[0], although it is of less use in such cases.
16053 :
16054 : IMPORTANT: this function is thread-unsafe and modifies internal structures
16055 : of the model! You can not use same model object for parallel
16056 : evaluation from several threads.
16057 :
16058 : Use knntsprocess() with independent thread-local buffers, if
16059 : you need thread-safe evaluation.
16060 :
16061 : INPUT PARAMETERS:
16062 : Model - KNN model
16063 : X - input vector, array[0..NVars-1].
16064 :
16065 : RESULT:
16066 : Y[0]
16067 :
16068 : -- ALGLIB --
16069 : Copyright 15.02.2019 by Bochkanov Sergey
16070 : *************************************************************************/
16071 0 : double knnprocess0(const knnmodel &model, const real_1d_array &x, const xparams _xparams)
16072 : {
16073 : jmp_buf _break_jump;
16074 : alglib_impl::ae_state _alglib_env_state;
16075 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16076 0 : if( setjmp(_break_jump) )
16077 : {
16078 : #if !defined(AE_NO_EXCEPTIONS)
16079 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16080 : #else
16081 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16082 : return 0;
16083 : #endif
16084 : }
16085 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16086 0 : if( _xparams.flags!=0x0 )
16087 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16088 0 : double result = alglib_impl::knnprocess0(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
16089 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16090 0 : return *(reinterpret_cast<double*>(&result));
16091 : }
16092 :
16093 : /*************************************************************************
16094 : This function returns most probable class number for an input X. It is
16095 : same as calling knnprocess(model,x,y), then determining i=argmax(y[i]) and
16096 : returning i.
16097 :
16098 : A class number in [0,NOut) range in returned for classification problems,
16099 : -1 is returned when this function is called for regression problems.
16100 :
16101 : IMPORTANT: this function is thread-unsafe and modifies internal structures
16102 : of the model! You can not use same model object for parallel
16103 : evaluation from several threads.
16104 :
16105 : Use knntsprocess() with independent thread-local buffers, if
16106 : you need thread-safe evaluation.
16107 :
16108 : INPUT PARAMETERS:
16109 : Model - KNN model
16110 : X - input vector, array[0..NVars-1].
16111 :
16112 : RESULT:
16113 : class number, -1 for regression tasks
16114 :
16115 : -- ALGLIB --
16116 : Copyright 15.02.2019 by Bochkanov Sergey
16117 : *************************************************************************/
16118 0 : ae_int_t knnclassify(const knnmodel &model, const real_1d_array &x, const xparams _xparams)
16119 : {
16120 : jmp_buf _break_jump;
16121 : alglib_impl::ae_state _alglib_env_state;
16122 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16123 0 : if( setjmp(_break_jump) )
16124 : {
16125 : #if !defined(AE_NO_EXCEPTIONS)
16126 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16127 : #else
16128 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16129 : return 0;
16130 : #endif
16131 : }
16132 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16133 0 : if( _xparams.flags!=0x0 )
16134 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16135 0 : alglib_impl::ae_int_t result = alglib_impl::knnclassify(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), &_alglib_env_state);
16136 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16137 0 : return *(reinterpret_cast<ae_int_t*>(&result));
16138 : }
16139 :
16140 : /*************************************************************************
16141 : 'interactive' variant of knnprocess() for languages like Python which
16142 : support constructs like "y = knnprocessi(model,x)" and interactive mode of
16143 : the interpreter.
16144 :
16145 : This function allocates new array on each call, so it is significantly
16146 : slower than its 'non-interactive' counterpart, but it is more convenient
16147 : when you call it from command line.
16148 :
16149 : IMPORTANT: this function is thread-unsafe and may modify internal
16150 : structures of the model! You can not use same model object for
16151 : parallel evaluation from several threads.
16152 :
16153 : Use knntsprocess() with independent thread-local buffers if
16154 : you need thread-safe evaluation.
16155 :
16156 : -- ALGLIB --
16157 : Copyright 15.02.2019 by Bochkanov Sergey
16158 : *************************************************************************/
16159 0 : void knnprocessi(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
16160 : {
16161 : jmp_buf _break_jump;
16162 : alglib_impl::ae_state _alglib_env_state;
16163 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16164 0 : if( setjmp(_break_jump) )
16165 : {
16166 : #if !defined(AE_NO_EXCEPTIONS)
16167 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16168 : #else
16169 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16170 : return;
16171 : #endif
16172 : }
16173 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16174 0 : if( _xparams.flags!=0x0 )
16175 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16176 0 : alglib_impl::knnprocessi(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
16177 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16178 0 : return;
16179 : }
16180 :
16181 : /*************************************************************************
16182 : Thread-safe procesing using external buffer for temporaries.
16183 :
16184 : This function is thread-safe (i.e . you can use same KNN model from
16185 : multiple threads) as long as you use different buffer objects for different
16186 : threads.
16187 :
16188 : INPUT PARAMETERS:
16189 : Model - KNN model
16190 : Buf - buffer object, must be allocated specifically for this
16191 : model with knncreatebuffer().
16192 : X - input vector, array[NVars]
16193 :
16194 : OUTPUT PARAMETERS:
16195 : Y - result, array[NOut]. Regression estimate when solving
16196 : regression task, vector of posterior probabilities for
16197 : a classification task.
16198 :
16199 : -- ALGLIB --
16200 : Copyright 15.02.2019 by Bochkanov Sergey
16201 : *************************************************************************/
16202 0 : void knntsprocess(const knnmodel &model, const knnbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams)
16203 : {
16204 : jmp_buf _break_jump;
16205 : alglib_impl::ae_state _alglib_env_state;
16206 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16207 0 : if( setjmp(_break_jump) )
16208 : {
16209 : #if !defined(AE_NO_EXCEPTIONS)
16210 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16211 : #else
16212 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16213 : return;
16214 : #endif
16215 : }
16216 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16217 0 : if( _xparams.flags!=0x0 )
16218 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16219 0 : alglib_impl::knntsprocess(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::knnbuffer*>(buf.c_ptr()), const_cast<alglib_impl::ae_vector*>(x.c_ptr()), const_cast<alglib_impl::ae_vector*>(y.c_ptr()), &_alglib_env_state);
16220 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16221 0 : return;
16222 : }
16223 :
16224 : /*************************************************************************
16225 : Relative classification error on the test set
16226 :
16227 : INPUT PARAMETERS:
16228 : Model - KNN model
16229 : XY - test set
16230 : NPoints - test set size
16231 :
16232 : RESULT:
16233 : percent of incorrectly classified cases.
16234 : Zero if model solves regression task.
16235 :
16236 : NOTE: if you need several different kinds of error metrics, it is better
16237 : to use knnallerrors() which computes all error metric with just one
16238 : pass over dataset.
16239 :
16240 : -- ALGLIB --
16241 : Copyright 15.02.2019 by Bochkanov Sergey
16242 : *************************************************************************/
16243 0 : double knnrelclserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
16244 : {
16245 : jmp_buf _break_jump;
16246 : alglib_impl::ae_state _alglib_env_state;
16247 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16248 0 : if( setjmp(_break_jump) )
16249 : {
16250 : #if !defined(AE_NO_EXCEPTIONS)
16251 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16252 : #else
16253 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16254 : return 0;
16255 : #endif
16256 : }
16257 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16258 0 : if( _xparams.flags!=0x0 )
16259 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16260 0 : double result = alglib_impl::knnrelclserror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
16261 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16262 0 : return *(reinterpret_cast<double*>(&result));
16263 : }
16264 :
16265 : /*************************************************************************
16266 : Average cross-entropy (in bits per element) on the test set
16267 :
16268 : INPUT PARAMETERS:
16269 : Model - KNN model
16270 : XY - test set
16271 : NPoints - test set size
16272 :
16273 : RESULT:
16274 : CrossEntropy/NPoints.
16275 : Zero if model solves regression task.
16276 :
16277 : NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
16278 : models (such models can report exactly zero probabilities), so we
16279 : do not recommend using it.
16280 :
16281 : NOTE: if you need several different kinds of error metrics, it is better
16282 : to use knnallerrors() which computes all error metric with just one
16283 : pass over dataset.
16284 :
16285 : -- ALGLIB --
16286 : Copyright 15.02.2019 by Bochkanov Sergey
16287 : *************************************************************************/
16288 0 : double knnavgce(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
16289 : {
16290 : jmp_buf _break_jump;
16291 : alglib_impl::ae_state _alglib_env_state;
16292 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16293 0 : if( setjmp(_break_jump) )
16294 : {
16295 : #if !defined(AE_NO_EXCEPTIONS)
16296 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16297 : #else
16298 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16299 : return 0;
16300 : #endif
16301 : }
16302 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16303 0 : if( _xparams.flags!=0x0 )
16304 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16305 0 : double result = alglib_impl::knnavgce(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
16306 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16307 0 : return *(reinterpret_cast<double*>(&result));
16308 : }
16309 :
16310 : /*************************************************************************
16311 : RMS error on the test set.
16312 :
16313 : Its meaning for regression task is obvious. As for classification problems,
16314 : RMS error means error when estimating posterior probabilities.
16315 :
16316 : INPUT PARAMETERS:
16317 : Model - KNN model
16318 : XY - test set
16319 : NPoints - test set size
16320 :
16321 : RESULT:
16322 : root mean square error.
16323 :
16324 : NOTE: if you need several different kinds of error metrics, it is better
16325 : to use knnallerrors() which computes all error metric with just one
16326 : pass over dataset.
16327 :
16328 : -- ALGLIB --
16329 : Copyright 15.02.2019 by Bochkanov Sergey
16330 : *************************************************************************/
16331 0 : double knnrmserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
16332 : {
16333 : jmp_buf _break_jump;
16334 : alglib_impl::ae_state _alglib_env_state;
16335 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16336 0 : if( setjmp(_break_jump) )
16337 : {
16338 : #if !defined(AE_NO_EXCEPTIONS)
16339 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16340 : #else
16341 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16342 : return 0;
16343 : #endif
16344 : }
16345 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16346 0 : if( _xparams.flags!=0x0 )
16347 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16348 0 : double result = alglib_impl::knnrmserror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
16349 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16350 0 : return *(reinterpret_cast<double*>(&result));
16351 : }
16352 :
16353 : /*************************************************************************
16354 : Average error on the test set
16355 :
16356 : Its meaning for regression task is obvious. As for classification problems,
16357 : average error means error when estimating posterior probabilities.
16358 :
16359 : INPUT PARAMETERS:
16360 : Model - KNN model
16361 : XY - test set
16362 : NPoints - test set size
16363 :
16364 : RESULT:
16365 : average error
16366 :
16367 : NOTE: if you need several different kinds of error metrics, it is better
16368 : to use knnallerrors() which computes all error metric with just one
16369 : pass over dataset.
16370 :
16371 : -- ALGLIB --
16372 : Copyright 15.02.2019 by Bochkanov Sergey
16373 : *************************************************************************/
16374 0 : double knnavgerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
16375 : {
16376 : jmp_buf _break_jump;
16377 : alglib_impl::ae_state _alglib_env_state;
16378 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16379 0 : if( setjmp(_break_jump) )
16380 : {
16381 : #if !defined(AE_NO_EXCEPTIONS)
16382 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16383 : #else
16384 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16385 : return 0;
16386 : #endif
16387 : }
16388 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16389 0 : if( _xparams.flags!=0x0 )
16390 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16391 0 : double result = alglib_impl::knnavgerror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
16392 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16393 0 : return *(reinterpret_cast<double*>(&result));
16394 : }
16395 :
16396 : /*************************************************************************
16397 : Average relative error on the test set
16398 :
16399 : Its meaning for regression task is obvious. As for classification problems,
16400 : average relative error means error when estimating posterior probabilities.
16401 :
16402 : INPUT PARAMETERS:
16403 : Model - KNN model
16404 : XY - test set
16405 : NPoints - test set size
16406 :
16407 : RESULT:
16408 : average relative error
16409 :
16410 : NOTE: if you need several different kinds of error metrics, it is better
16411 : to use knnallerrors() which computes all error metric with just one
16412 : pass over dataset.
16413 :
16414 : -- ALGLIB --
16415 : Copyright 15.02.2019 by Bochkanov Sergey
16416 : *************************************************************************/
16417 0 : double knnavgrelerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams)
16418 : {
16419 : jmp_buf _break_jump;
16420 : alglib_impl::ae_state _alglib_env_state;
16421 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16422 0 : if( setjmp(_break_jump) )
16423 : {
16424 : #if !defined(AE_NO_EXCEPTIONS)
16425 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16426 : #else
16427 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16428 : return 0;
16429 : #endif
16430 : }
16431 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16432 0 : if( _xparams.flags!=0x0 )
16433 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16434 0 : double result = alglib_impl::knnavgrelerror(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, &_alglib_env_state);
16435 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16436 0 : return *(reinterpret_cast<double*>(&result));
16437 : }
16438 :
16439 : /*************************************************************************
16440 : Calculates all kinds of errors for the model in one call.
16441 :
16442 : INPUT PARAMETERS:
16443 : Model - KNN model
16444 : XY - test set:
16445 : * one row per point
16446 : * first NVars columns store independent variables
16447 : * depending on problem type:
16448 : * next column stores class number in [0,NClasses) - for
16449 : classification problems
16450 : * next NOut columns store dependent variables - for
16451 : regression problems
16452 : NPoints - test set size, NPoints>=0
16453 :
16454 : OUTPUT PARAMETERS:
16455 : Rep - following fields are loaded with errors for both regression
16456 : and classification models:
16457 : * rep.rmserror - RMS error for the output
16458 : * rep.avgerror - average error
16459 : * rep.avgrelerror - average relative error
16460 : following fields are set only for classification models,
16461 : zero for regression ones:
16462 : * relclserror - relative classification error, in [0,1]
16463 : * avgce - average cross-entropy in bits per dataset entry
16464 :
16465 : NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
16466 : models (such models can report exactly zero probabilities), so we
16467 : do not recommend using it.
16468 :
16469 : -- ALGLIB --
16470 : Copyright 15.02.2019 by Bochkanov Sergey
16471 : *************************************************************************/
16472 0 : void knnallerrors(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, knnreport &rep, const xparams _xparams)
16473 : {
16474 : jmp_buf _break_jump;
16475 : alglib_impl::ae_state _alglib_env_state;
16476 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16477 0 : if( setjmp(_break_jump) )
16478 : {
16479 : #if !defined(AE_NO_EXCEPTIONS)
16480 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16481 : #else
16482 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16483 : return;
16484 : #endif
16485 : }
16486 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16487 0 : if( _xparams.flags!=0x0 )
16488 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16489 0 : alglib_impl::knnallerrors(const_cast<alglib_impl::knnmodel*>(model.c_ptr()), const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, const_cast<alglib_impl::knnreport*>(rep.c_ptr()), &_alglib_env_state);
16490 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16491 0 : return;
16492 : }
16493 : #endif
16494 :
16495 : #if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
16496 : /*************************************************************************
16497 : k-means++ clusterization.
16498 : Backward compatibility function, we recommend to use CLUSTERING subpackage
16499 : as better replacement.
16500 :
16501 : -- ALGLIB --
16502 : Copyright 21.03.2009 by Bochkanov Sergey
16503 : *************************************************************************/
16504 0 : void kmeansgenerate(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t k, const ae_int_t restarts, ae_int_t &info, real_2d_array &c, integer_1d_array &xyc, const xparams _xparams)
16505 : {
16506 : jmp_buf _break_jump;
16507 : alglib_impl::ae_state _alglib_env_state;
16508 0 : alglib_impl::ae_state_init(&_alglib_env_state);
16509 0 : if( setjmp(_break_jump) )
16510 : {
16511 : #if !defined(AE_NO_EXCEPTIONS)
16512 0 : _ALGLIB_CPP_EXCEPTION(_alglib_env_state.error_msg);
16513 : #else
16514 : _ALGLIB_SET_ERROR_FLAG(_alglib_env_state.error_msg);
16515 : return;
16516 : #endif
16517 : }
16518 0 : ae_state_set_break_jump(&_alglib_env_state, &_break_jump);
16519 0 : if( _xparams.flags!=0x0 )
16520 0 : ae_state_set_flags(&_alglib_env_state, _xparams.flags);
16521 0 : alglib_impl::kmeansgenerate(const_cast<alglib_impl::ae_matrix*>(xy.c_ptr()), npoints, nvars, k, restarts, &info, const_cast<alglib_impl::ae_matrix*>(c.c_ptr()), const_cast<alglib_impl::ae_vector*>(xyc.c_ptr()), &_alglib_env_state);
16522 0 : alglib_impl::ae_state_clear(&_alglib_env_state);
16523 0 : return;
16524 : }
16525 : #endif
16526 : }
16527 :
16528 : /////////////////////////////////////////////////////////////////////////
16529 : //
16530 : // THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
16531 : //
16532 : /////////////////////////////////////////////////////////////////////////
16533 : namespace alglib_impl
16534 : {
16535 : #if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
16536 :
16537 :
16538 : #endif
16539 : #if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
16540 : static double bdss_xlny(double x, double y, ae_state *_state);
16541 : static double bdss_getcv(/* Integer */ ae_vector* cnt,
16542 : ae_int_t nc,
16543 : ae_state *_state);
16544 : static void bdss_tieaddc(/* Integer */ ae_vector* c,
16545 : /* Integer */ ae_vector* ties,
16546 : ae_int_t ntie,
16547 : ae_int_t nc,
16548 : /* Integer */ ae_vector* cnt,
16549 : ae_state *_state);
16550 : static void bdss_tiesubc(/* Integer */ ae_vector* c,
16551 : /* Integer */ ae_vector* ties,
16552 : ae_int_t ntie,
16553 : ae_int_t nc,
16554 : /* Integer */ ae_vector* cnt,
16555 : ae_state *_state);
16556 :
16557 :
16558 : #endif
16559 : #if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
16560 : static ae_int_t mlpbase_mlpvnum = 7;
16561 : static ae_int_t mlpbase_mlpfirstversion = 0;
16562 : static ae_int_t mlpbase_nfieldwidth = 4;
16563 : static ae_int_t mlpbase_hlconnfieldwidth = 5;
16564 : static ae_int_t mlpbase_hlnfieldwidth = 4;
16565 : static ae_int_t mlpbase_gradbasecasecost = 50000;
16566 : static ae_int_t mlpbase_microbatchsize = 64;
16567 : static void mlpbase_addinputlayer(ae_int_t ncount,
16568 : /* Integer */ ae_vector* lsizes,
16569 : /* Integer */ ae_vector* ltypes,
16570 : /* Integer */ ae_vector* lconnfirst,
16571 : /* Integer */ ae_vector* lconnlast,
16572 : ae_int_t* lastproc,
16573 : ae_state *_state);
16574 : static void mlpbase_addbiasedsummatorlayer(ae_int_t ncount,
16575 : /* Integer */ ae_vector* lsizes,
16576 : /* Integer */ ae_vector* ltypes,
16577 : /* Integer */ ae_vector* lconnfirst,
16578 : /* Integer */ ae_vector* lconnlast,
16579 : ae_int_t* lastproc,
16580 : ae_state *_state);
16581 : static void mlpbase_addactivationlayer(ae_int_t functype,
16582 : /* Integer */ ae_vector* lsizes,
16583 : /* Integer */ ae_vector* ltypes,
16584 : /* Integer */ ae_vector* lconnfirst,
16585 : /* Integer */ ae_vector* lconnlast,
16586 : ae_int_t* lastproc,
16587 : ae_state *_state);
16588 : static void mlpbase_addzerolayer(/* Integer */ ae_vector* lsizes,
16589 : /* Integer */ ae_vector* ltypes,
16590 : /* Integer */ ae_vector* lconnfirst,
16591 : /* Integer */ ae_vector* lconnlast,
16592 : ae_int_t* lastproc,
16593 : ae_state *_state);
16594 : static void mlpbase_hladdinputlayer(multilayerperceptron* network,
16595 : ae_int_t* connidx,
16596 : ae_int_t* neuroidx,
16597 : ae_int_t* structinfoidx,
16598 : ae_int_t nin,
16599 : ae_state *_state);
16600 : static void mlpbase_hladdoutputlayer(multilayerperceptron* network,
16601 : ae_int_t* connidx,
16602 : ae_int_t* neuroidx,
16603 : ae_int_t* structinfoidx,
16604 : ae_int_t* weightsidx,
16605 : ae_int_t k,
16606 : ae_int_t nprev,
16607 : ae_int_t nout,
16608 : ae_bool iscls,
16609 : ae_bool islinearout,
16610 : ae_state *_state);
16611 : static void mlpbase_hladdhiddenlayer(multilayerperceptron* network,
16612 : ae_int_t* connidx,
16613 : ae_int_t* neuroidx,
16614 : ae_int_t* structinfoidx,
16615 : ae_int_t* weightsidx,
16616 : ae_int_t k,
16617 : ae_int_t nprev,
16618 : ae_int_t ncur,
16619 : ae_state *_state);
16620 : static void mlpbase_fillhighlevelinformation(multilayerperceptron* network,
16621 : ae_int_t nin,
16622 : ae_int_t nhid1,
16623 : ae_int_t nhid2,
16624 : ae_int_t nout,
16625 : ae_bool iscls,
16626 : ae_bool islinearout,
16627 : ae_state *_state);
16628 : static void mlpbase_mlpcreate(ae_int_t nin,
16629 : ae_int_t nout,
16630 : /* Integer */ ae_vector* lsizes,
16631 : /* Integer */ ae_vector* ltypes,
16632 : /* Integer */ ae_vector* lconnfirst,
16633 : /* Integer */ ae_vector* lconnlast,
16634 : ae_int_t layerscount,
16635 : ae_bool isclsnet,
16636 : multilayerperceptron* network,
16637 : ae_state *_state);
16638 : static void mlpbase_mlphessianbatchinternal(multilayerperceptron* network,
16639 : /* Real */ ae_matrix* xy,
16640 : ae_int_t ssize,
16641 : ae_bool naturalerr,
16642 : double* e,
16643 : /* Real */ ae_vector* grad,
16644 : /* Real */ ae_matrix* h,
16645 : ae_state *_state);
16646 : static void mlpbase_mlpinternalcalculategradient(multilayerperceptron* network,
16647 : /* Real */ ae_vector* neurons,
16648 : /* Real */ ae_vector* weights,
16649 : /* Real */ ae_vector* derror,
16650 : /* Real */ ae_vector* grad,
16651 : ae_bool naturalerrorfunc,
16652 : ae_state *_state);
16653 : static void mlpbase_mlpchunkedgradient(multilayerperceptron* network,
16654 : /* Real */ ae_matrix* xy,
16655 : ae_int_t cstart,
16656 : ae_int_t csize,
16657 : /* Real */ ae_vector* batch4buf,
16658 : /* Real */ ae_vector* hpcbuf,
16659 : double* e,
16660 : ae_bool naturalerrorfunc,
16661 : ae_state *_state);
16662 : static void mlpbase_mlpchunkedprocess(multilayerperceptron* network,
16663 : /* Real */ ae_matrix* xy,
16664 : ae_int_t cstart,
16665 : ae_int_t csize,
16666 : /* Real */ ae_vector* batch4buf,
16667 : /* Real */ ae_vector* hpcbuf,
16668 : ae_state *_state);
16669 : static double mlpbase_safecrossentropy(double t,
16670 : double z,
16671 : ae_state *_state);
16672 : static void mlpbase_randomizebackwardpass(multilayerperceptron* network,
16673 : ae_int_t neuronidx,
16674 : double v,
16675 : ae_state *_state);
16676 :
16677 :
16678 : #endif
16679 : #if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
16680 :
16681 :
16682 : #endif
16683 : #if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
16684 : static ae_bool ssa_hassomethingtoanalyze(ssamodel* s, ae_state *_state);
16685 : static ae_bool ssa_issequencebigenough(ssamodel* s,
16686 : ae_int_t i,
16687 : ae_state *_state);
16688 : static void ssa_updatebasis(ssamodel* s,
16689 : ae_int_t appendlen,
16690 : double updateits,
16691 : ae_state *_state);
16692 : static void ssa_analyzesequence(ssamodel* s,
16693 : /* Real */ ae_vector* data,
16694 : ae_int_t i0,
16695 : ae_int_t i1,
16696 : /* Real */ ae_vector* trend,
16697 : /* Real */ ae_vector* noise,
16698 : ae_int_t offs,
16699 : ae_state *_state);
16700 : static void ssa_forecastavgsequence(ssamodel* s,
16701 : /* Real */ ae_vector* data,
16702 : ae_int_t i0,
16703 : ae_int_t i1,
16704 : ae_int_t m,
16705 : ae_int_t forecastlen,
16706 : ae_bool smooth,
16707 : /* Real */ ae_vector* trend,
16708 : ae_int_t offs,
16709 : ae_state *_state);
16710 : static void ssa_realtimedequeue(ssamodel* s,
16711 : double beta,
16712 : ae_int_t cnt,
16713 : ae_state *_state);
16714 : static void ssa_updatexxtprepare(ssamodel* s,
16715 : ae_int_t updatesize,
16716 : ae_int_t windowwidth,
16717 : ae_int_t memorylimit,
16718 : ae_state *_state);
16719 : static void ssa_updatexxtsend(ssamodel* s,
16720 : /* Real */ ae_vector* u,
16721 : ae_int_t i0,
16722 : /* Real */ ae_matrix* xxt,
16723 : ae_state *_state);
16724 : static void ssa_updatexxtfinalize(ssamodel* s,
16725 : /* Real */ ae_matrix* xxt,
16726 : ae_state *_state);
16727 :
16728 :
16729 : #endif
16730 : #if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
16731 : static ae_int_t linreg_lrvnum = 5;
16732 : static void linreg_lrinternal(/* Real */ ae_matrix* xy,
16733 : /* Real */ ae_vector* s,
16734 : ae_int_t npoints,
16735 : ae_int_t nvars,
16736 : ae_int_t* info,
16737 : linearmodel* lm,
16738 : lrreport* ar,
16739 : ae_state *_state);
16740 :
16741 :
16742 : #endif
16743 : #if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
16744 :
16745 :
16746 : #endif
16747 : #if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
16748 : static double logit_xtol = 100*ae_machineepsilon;
16749 : static double logit_ftol = 0.0001;
16750 : static double logit_gtol = 0.3;
16751 : static ae_int_t logit_maxfev = 20;
16752 : static double logit_stpmin = 1.0E-2;
16753 : static double logit_stpmax = 1.0E5;
16754 : static ae_int_t logit_logitvnum = 6;
16755 : static void logit_mnliexp(/* Real */ ae_vector* w,
16756 : /* Real */ ae_vector* x,
16757 : ae_state *_state);
16758 : static void logit_mnlallerrors(logitmodel* lm,
16759 : /* Real */ ae_matrix* xy,
16760 : ae_int_t npoints,
16761 : double* relcls,
16762 : double* avgce,
16763 : double* rms,
16764 : double* avg,
16765 : double* avgrel,
16766 : ae_state *_state);
16767 : static void logit_mnlmcsrch(ae_int_t n,
16768 : /* Real */ ae_vector* x,
16769 : double* f,
16770 : /* Real */ ae_vector* g,
16771 : /* Real */ ae_vector* s,
16772 : double* stp,
16773 : ae_int_t* info,
16774 : ae_int_t* nfev,
16775 : /* Real */ ae_vector* wa,
16776 : logitmcstate* state,
16777 : ae_int_t* stage,
16778 : ae_state *_state);
16779 : static void logit_mnlmcstep(double* stx,
16780 : double* fx,
16781 : double* dx,
16782 : double* sty,
16783 : double* fy,
16784 : double* dy,
16785 : double* stp,
16786 : double fp,
16787 : double dp,
16788 : ae_bool* brackt,
16789 : double stmin,
16790 : double stmax,
16791 : ae_int_t* info,
16792 : ae_state *_state);
16793 :
16794 :
16795 : #endif
16796 : #if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
16797 : static double mcpd_xtol = 1.0E-8;
16798 : static void mcpd_mcpdinit(ae_int_t n,
16799 : ae_int_t entrystate,
16800 : ae_int_t exitstate,
16801 : mcpdstate* s,
16802 : ae_state *_state);
16803 :
16804 :
16805 : #endif
16806 : #if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
16807 : static ae_int_t mlpe_mlpefirstversion = 1;
16808 :
16809 :
16810 : #endif
16811 : #if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
16812 : static double mlptrain_mindecay = 0.001;
16813 : static ae_int_t mlptrain_defaultlbfgsfactor = 6;
16814 : static void mlptrain_mlpkfoldcvgeneral(multilayerperceptron* n,
16815 : /* Real */ ae_matrix* xy,
16816 : ae_int_t npoints,
16817 : double decay,
16818 : ae_int_t restarts,
16819 : ae_int_t foldscount,
16820 : ae_bool lmalgorithm,
16821 : double wstep,
16822 : ae_int_t maxits,
16823 : ae_int_t* info,
16824 : mlpreport* rep,
16825 : mlpcvreport* cvrep,
16826 : ae_state *_state);
16827 : static void mlptrain_mlpkfoldsplit(/* Real */ ae_matrix* xy,
16828 : ae_int_t npoints,
16829 : ae_int_t nclasses,
16830 : ae_int_t foldscount,
16831 : ae_bool stratifiedsplits,
16832 : /* Integer */ ae_vector* folds,
16833 : ae_state *_state);
16834 : static void mlptrain_mthreadcv(mlptrainer* s,
16835 : ae_int_t rowsize,
16836 : ae_int_t nrestarts,
16837 : /* Integer */ ae_vector* folds,
16838 : ae_int_t fold,
16839 : ae_int_t dfold,
16840 : /* Real */ ae_matrix* cvy,
16841 : ae_shared_pool* pooldatacv,
16842 : ae_int_t wcount,
16843 : ae_state *_state);
16844 : ae_bool _trypexec_mlptrain_mthreadcv(mlptrainer* s,
16845 : ae_int_t rowsize,
16846 : ae_int_t nrestarts,
16847 : /* Integer */ ae_vector* folds,
16848 : ae_int_t fold,
16849 : ae_int_t dfold,
16850 : /* Real */ ae_matrix* cvy,
16851 : ae_shared_pool* pooldatacv,
16852 : ae_int_t wcount, ae_state *_state);
16853 : static void mlptrain_mlptrainnetworkx(mlptrainer* s,
16854 : ae_int_t nrestarts,
16855 : ae_int_t algokind,
16856 : /* Integer */ ae_vector* trnsubset,
16857 : ae_int_t trnsubsetsize,
16858 : /* Integer */ ae_vector* valsubset,
16859 : ae_int_t valsubsetsize,
16860 : multilayerperceptron* network,
16861 : mlpreport* rep,
16862 : ae_bool isrootcall,
16863 : ae_shared_pool* sessions,
16864 : ae_state *_state);
16865 : ae_bool _trypexec_mlptrain_mlptrainnetworkx(mlptrainer* s,
16866 : ae_int_t nrestarts,
16867 : ae_int_t algokind,
16868 : /* Integer */ ae_vector* trnsubset,
16869 : ae_int_t trnsubsetsize,
16870 : /* Integer */ ae_vector* valsubset,
16871 : ae_int_t valsubsetsize,
16872 : multilayerperceptron* network,
16873 : mlpreport* rep,
16874 : ae_bool isrootcall,
16875 : ae_shared_pool* sessions, ae_state *_state);
16876 : static void mlptrain_mlptrainensemblex(mlptrainer* s,
16877 : mlpensemble* ensemble,
16878 : ae_int_t idx0,
16879 : ae_int_t idx1,
16880 : ae_int_t nrestarts,
16881 : ae_int_t trainingmethod,
16882 : sinteger* ngrad,
16883 : ae_bool isrootcall,
16884 : ae_shared_pool* esessions,
16885 : ae_state *_state);
16886 : ae_bool _trypexec_mlptrain_mlptrainensemblex(mlptrainer* s,
16887 : mlpensemble* ensemble,
16888 : ae_int_t idx0,
16889 : ae_int_t idx1,
16890 : ae_int_t nrestarts,
16891 : ae_int_t trainingmethod,
16892 : sinteger* ngrad,
16893 : ae_bool isrootcall,
16894 : ae_shared_pool* esessions, ae_state *_state);
16895 : static void mlptrain_mlpstarttrainingx(mlptrainer* s,
16896 : ae_bool randomstart,
16897 : ae_int_t algokind,
16898 : /* Integer */ ae_vector* subset,
16899 : ae_int_t subsetsize,
16900 : smlptrnsession* session,
16901 : ae_state *_state);
16902 : static ae_bool mlptrain_mlpcontinuetrainingx(mlptrainer* s,
16903 : /* Integer */ ae_vector* subset,
16904 : ae_int_t subsetsize,
16905 : ae_int_t* ngradbatch,
16906 : smlptrnsession* session,
16907 : ae_state *_state);
16908 : static void mlptrain_mlpebagginginternal(mlpensemble* ensemble,
16909 : /* Real */ ae_matrix* xy,
16910 : ae_int_t npoints,
16911 : double decay,
16912 : ae_int_t restarts,
16913 : double wstep,
16914 : ae_int_t maxits,
16915 : ae_bool lmalgorithm,
16916 : ae_int_t* info,
16917 : mlpreport* rep,
16918 : mlpcvreport* ooberrors,
16919 : ae_state *_state);
16920 : static void mlptrain_initmlptrnsession(multilayerperceptron* networktrained,
16921 : ae_bool randomizenetwork,
16922 : mlptrainer* trainer,
16923 : smlptrnsession* session,
16924 : ae_state *_state);
16925 : static void mlptrain_initmlptrnsessions(multilayerperceptron* networktrained,
16926 : ae_bool randomizenetwork,
16927 : mlptrainer* trainer,
16928 : ae_shared_pool* sessions,
16929 : ae_state *_state);
16930 : static void mlptrain_initmlpetrnsession(multilayerperceptron* individualnetwork,
16931 : mlptrainer* trainer,
16932 : mlpetrnsession* session,
16933 : ae_state *_state);
16934 : static void mlptrain_initmlpetrnsessions(multilayerperceptron* individualnetwork,
16935 : mlptrainer* trainer,
16936 : ae_shared_pool* sessions,
16937 : ae_state *_state);
16938 :
16939 :
16940 : #endif
16941 : #if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
16942 : static ae_int_t clustering_kmeansblocksize = 32;
16943 : static ae_int_t clustering_kmeansparalleldim = 8;
16944 : static ae_int_t clustering_kmeansparallelk = 4;
16945 : static double clustering_complexitymultiplier = 1.0;
16946 : static void clustering_selectinitialcenters(/* Real */ ae_matrix* xy,
16947 : ae_int_t npoints,
16948 : ae_int_t nvars,
16949 : ae_int_t initalgo,
16950 : hqrndstate* rs,
16951 : ae_int_t k,
16952 : /* Real */ ae_matrix* ct,
16953 : apbuffers* initbuf,
16954 : ae_shared_pool* updatepool,
16955 : ae_state *_state);
16956 : static ae_bool clustering_fixcenters(/* Real */ ae_matrix* xy,
16957 : ae_int_t npoints,
16958 : ae_int_t nvars,
16959 : /* Real */ ae_matrix* ct,
16960 : ae_int_t k,
16961 : apbuffers* initbuf,
16962 : ae_shared_pool* updatepool,
16963 : ae_state *_state);
16964 : static void clustering_clusterizerrunahcinternal(clusterizerstate* s,
16965 : /* Real */ ae_matrix* d,
16966 : ahcreport* rep,
16967 : ae_state *_state);
16968 : static void clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
16969 : ae_int_t nfeatures,
16970 : ae_int_t disttype,
16971 : /* Real */ ae_matrix* d,
16972 : ae_int_t i0,
16973 : ae_int_t i1,
16974 : ae_int_t j0,
16975 : ae_int_t j1,
16976 : ae_state *_state);
16977 : ae_bool _trypexec_clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
16978 : ae_int_t nfeatures,
16979 : ae_int_t disttype,
16980 : /* Real */ ae_matrix* d,
16981 : ae_int_t i0,
16982 : ae_int_t i1,
16983 : ae_int_t j0,
16984 : ae_int_t j1, ae_state *_state);
16985 :
16986 :
16987 : #endif
16988 : #if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
16989 : static ae_int_t dforest_innernodewidth = 3;
16990 : static ae_int_t dforest_leafnodewidth = 2;
16991 : static ae_int_t dforest_dfusestrongsplits = 1;
16992 : static ae_int_t dforest_dfuseevs = 2;
16993 : static ae_int_t dforest_dfuncompressedv0 = 0;
16994 : static ae_int_t dforest_dfcompressedv0 = 1;
16995 : static ae_int_t dforest_needtrngini = 1;
16996 : static ae_int_t dforest_needoobgini = 2;
16997 : static ae_int_t dforest_needpermutation = 3;
16998 : static ae_int_t dforest_permutationimportancebatchsize = 512;
16999 : static void dforest_buildrandomtree(decisionforestbuilder* s,
17000 : ae_int_t treeidx0,
17001 : ae_int_t treeidx1,
17002 : ae_state *_state);
17003 : ae_bool _trypexec_dforest_buildrandomtree(decisionforestbuilder* s,
17004 : ae_int_t treeidx0,
17005 : ae_int_t treeidx1, ae_state *_state);
17006 : static void dforest_buildrandomtreerec(decisionforestbuilder* s,
17007 : dfworkbuf* workbuf,
17008 : ae_int_t workingset,
17009 : ae_int_t varstoselect,
17010 : /* Real */ ae_vector* treebuf,
17011 : dfvotebuf* votebuf,
17012 : hqrndstate* rs,
17013 : ae_int_t idx0,
17014 : ae_int_t idx1,
17015 : ae_int_t oobidx0,
17016 : ae_int_t oobidx1,
17017 : double meanloss,
17018 : double topmostmeanloss,
17019 : ae_int_t* treesize,
17020 : ae_state *_state);
17021 : static void dforest_estimatevariableimportance(decisionforestbuilder* s,
17022 : ae_int_t sessionseed,
17023 : decisionforest* df,
17024 : ae_int_t ntrees,
17025 : dfreport* rep,
17026 : ae_state *_state);
17027 : ae_bool _trypexec_dforest_estimatevariableimportance(decisionforestbuilder* s,
17028 : ae_int_t sessionseed,
17029 : decisionforest* df,
17030 : ae_int_t ntrees,
17031 : dfreport* rep, ae_state *_state);
17032 : static void dforest_estimatepermutationimportances(decisionforestbuilder* s,
17033 : decisionforest* df,
17034 : ae_int_t ntrees,
17035 : ae_shared_pool* permpool,
17036 : ae_int_t idx0,
17037 : ae_int_t idx1,
17038 : ae_state *_state);
17039 : ae_bool _trypexec_dforest_estimatepermutationimportances(decisionforestbuilder* s,
17040 : decisionforest* df,
17041 : ae_int_t ntrees,
17042 : ae_shared_pool* permpool,
17043 : ae_int_t idx0,
17044 : ae_int_t idx1, ae_state *_state);
17045 : static void dforest_cleanreport(decisionforestbuilder* s,
17046 : dfreport* rep,
17047 : ae_state *_state);
17048 : static double dforest_meannrms2(ae_int_t nclasses,
17049 : /* Integer */ ae_vector* trnlabelsi,
17050 : /* Real */ ae_vector* trnlabelsr,
17051 : ae_int_t trnidx0,
17052 : ae_int_t trnidx1,
17053 : /* Integer */ ae_vector* tstlabelsi,
17054 : /* Real */ ae_vector* tstlabelsr,
17055 : ae_int_t tstidx0,
17056 : ae_int_t tstidx1,
17057 : /* Integer */ ae_vector* tmpi,
17058 : ae_state *_state);
17059 : static void dforest_choosecurrentsplitdense(decisionforestbuilder* s,
17060 : dfworkbuf* workbuf,
17061 : ae_int_t* varsinpool,
17062 : ae_int_t varstoselect,
17063 : hqrndstate* rs,
17064 : ae_int_t idx0,
17065 : ae_int_t idx1,
17066 : ae_int_t* varbest,
17067 : double* splitbest,
17068 : ae_state *_state);
17069 : static void dforest_evaluatedensesplit(decisionforestbuilder* s,
17070 : dfworkbuf* workbuf,
17071 : hqrndstate* rs,
17072 : ae_int_t splitvar,
17073 : ae_int_t idx0,
17074 : ae_int_t idx1,
17075 : ae_int_t* info,
17076 : double* split,
17077 : double* rms,
17078 : ae_state *_state);
17079 : static void dforest_classifiersplit(decisionforestbuilder* s,
17080 : dfworkbuf* workbuf,
17081 : /* Real */ ae_vector* x,
17082 : /* Integer */ ae_vector* c,
17083 : ae_int_t n,
17084 : hqrndstate* rs,
17085 : ae_int_t* info,
17086 : double* threshold,
17087 : double* e,
17088 : /* Real */ ae_vector* sortrbuf,
17089 : /* Integer */ ae_vector* sortibuf,
17090 : ae_state *_state);
17091 : static void dforest_regressionsplit(decisionforestbuilder* s,
17092 : dfworkbuf* workbuf,
17093 : /* Real */ ae_vector* x,
17094 : /* Real */ ae_vector* y,
17095 : ae_int_t n,
17096 : ae_int_t* info,
17097 : double* threshold,
17098 : double* e,
17099 : /* Real */ ae_vector* sortrbuf,
17100 : /* Real */ ae_vector* sortrbuf2,
17101 : ae_state *_state);
17102 : static double dforest_getsplit(decisionforestbuilder* s,
17103 : double a,
17104 : double b,
17105 : hqrndstate* rs,
17106 : ae_state *_state);
17107 : static void dforest_outputleaf(decisionforestbuilder* s,
17108 : dfworkbuf* workbuf,
17109 : /* Real */ ae_vector* treebuf,
17110 : dfvotebuf* votebuf,
17111 : ae_int_t idx0,
17112 : ae_int_t idx1,
17113 : ae_int_t oobidx0,
17114 : ae_int_t oobidx1,
17115 : ae_int_t* treesize,
17116 : double leafval,
17117 : ae_state *_state);
17118 : static void dforest_analyzeandpreprocessdataset(decisionforestbuilder* s,
17119 : ae_state *_state);
17120 : static void dforest_mergetrees(decisionforestbuilder* s,
17121 : decisionforest* df,
17122 : ae_state *_state);
17123 : static void dforest_processvotingresults(decisionforestbuilder* s,
17124 : ae_int_t ntrees,
17125 : dfvotebuf* buf,
17126 : dfreport* rep,
17127 : ae_state *_state);
17128 : static double dforest_binarycompression(decisionforest* df,
17129 : ae_bool usemantissa8,
17130 : ae_state *_state);
17131 : static ae_int_t dforest_computecompressedsizerec(decisionforest* df,
17132 : ae_bool usemantissa8,
17133 : ae_int_t treeroot,
17134 : ae_int_t treepos,
17135 : /* Integer */ ae_vector* compressedsizes,
17136 : ae_bool savecompressedsizes,
17137 : ae_state *_state);
17138 : static void dforest_compressrec(decisionforest* df,
17139 : ae_bool usemantissa8,
17140 : ae_int_t treeroot,
17141 : ae_int_t treepos,
17142 : /* Integer */ ae_vector* compressedsizes,
17143 : ae_vector* buf,
17144 : ae_int_t* dstoffs,
17145 : ae_state *_state);
17146 : static ae_int_t dforest_computecompresseduintsize(ae_int_t v,
17147 : ae_state *_state);
17148 : static void dforest_streamuint(ae_vector* buf,
17149 : ae_int_t* offs,
17150 : ae_int_t v,
17151 : ae_state *_state);
17152 : static ae_int_t dforest_unstreamuint(ae_vector* buf,
17153 : ae_int_t* offs,
17154 : ae_state *_state);
17155 : static void dforest_streamfloat(ae_vector* buf,
17156 : ae_bool usemantissa8,
17157 : ae_int_t* offs,
17158 : double v,
17159 : ae_state *_state);
17160 : static double dforest_unstreamfloat(ae_vector* buf,
17161 : ae_bool usemantissa8,
17162 : ae_int_t* offs,
17163 : ae_state *_state);
17164 : static ae_int_t dforest_dfclserror(decisionforest* df,
17165 : /* Real */ ae_matrix* xy,
17166 : ae_int_t npoints,
17167 : ae_state *_state);
17168 : static void dforest_dfprocessinternaluncompressed(decisionforest* df,
17169 : ae_int_t subtreeroot,
17170 : ae_int_t nodeoffs,
17171 : /* Real */ ae_vector* x,
17172 : /* Real */ ae_vector* y,
17173 : ae_state *_state);
17174 : static void dforest_dfprocessinternalcompressed(decisionforest* df,
17175 : ae_int_t offs,
17176 : /* Real */ ae_vector* x,
17177 : /* Real */ ae_vector* y,
17178 : ae_state *_state);
17179 : static double dforest_xfastpow(double r, ae_int_t n, ae_state *_state);
17180 :
17181 :
17182 : #endif
17183 : #if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
17184 : static ae_int_t knn_knnfirstversion = 0;
17185 : static void knn_clearreport(knnreport* rep, ae_state *_state);
17186 : static void knn_processinternal(knnmodel* model,
17187 : knnbuffer* buf,
17188 : ae_state *_state);
17189 :
17190 :
17191 : #endif
17192 : #if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
17193 :
17194 :
17195 : #endif
17196 :
17197 : #if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
17198 :
17199 :
17200 : /*************************************************************************
17201 : Principal components analysis
17202 :
17203 : This function builds orthogonal basis where first axis corresponds to
17204 : direction with maximum variance, second axis maximizes variance in the
17205 : subspace orthogonal to first axis and so on.
17206 :
17207 : This function builds FULL basis, i.e. returns N vectors corresponding to
17208 : ALL directions, no matter how informative. If you need just a few (say,
17209 : 10 or 50) of the most important directions, you may find it faster to use
17210 : one of the reduced versions:
17211 : * pcatruncatedsubspace() - for subspace iteration based method
17212 :
17213 : It should be noted that, unlike LDA, PCA does not use class labels.
17214 :
17215 : ! COMMERCIAL EDITION OF ALGLIB:
17216 : !
17217 : ! Commercial Edition of ALGLIB includes following important improvements
17218 : ! of this function:
17219 : ! * high-performance native backend with same C# interface (C# version)
17220 : ! * multithreading support (C++ and C# versions)
17221 : ! * hardware vendor (Intel) implementations of linear algebra primitives
17222 : ! (C++ and C# versions, x86/x64 platform)
17223 : !
17224 : ! We recommend you to read 'Working with commercial version' section of
17225 : ! ALGLIB Reference Manual in order to find out how to use performance-
17226 : ! related features provided by commercial edition of ALGLIB.
17227 :
17228 : INPUT PARAMETERS:
17229 : X - dataset, array[0..NPoints-1,0..NVars-1].
17230 : matrix contains ONLY INDEPENDENT VARIABLES.
17231 : NPoints - dataset size, NPoints>=0
17232 : NVars - number of independent variables, NVars>=1
17233 :
17234 : OUTPUT PARAMETERS:
17235 : Info - return code:
17236 : * -4, if SVD subroutine haven't converged
17237 : * -1, if wrong parameters has been passed (NPoints<0,
17238 : NVars<1)
17239 : * 1, if task is solved
17240 : S2 - array[0..NVars-1]. variance values corresponding
17241 : to basis vectors.
17242 : V - array[0..NVars-1,0..NVars-1]
17243 : matrix, whose columns store basis vectors.
17244 :
17245 : -- ALGLIB --
17246 : Copyright 25.08.2008 by Bochkanov Sergey
17247 : *************************************************************************/
17248 0 : void pcabuildbasis(/* Real */ ae_matrix* x,
17249 : ae_int_t npoints,
17250 : ae_int_t nvars,
17251 : ae_int_t* info,
17252 : /* Real */ ae_vector* s2,
17253 : /* Real */ ae_matrix* v,
17254 : ae_state *_state)
17255 : {
17256 : ae_frame _frame_block;
17257 : ae_matrix a;
17258 : ae_matrix u;
17259 : ae_matrix vt;
17260 : ae_vector m;
17261 : ae_vector t;
17262 : ae_int_t i;
17263 : ae_int_t j;
17264 : double mean;
17265 : double variance;
17266 : double skewness;
17267 : double kurtosis;
17268 :
17269 0 : ae_frame_make(_state, &_frame_block);
17270 0 : memset(&a, 0, sizeof(a));
17271 0 : memset(&u, 0, sizeof(u));
17272 0 : memset(&vt, 0, sizeof(vt));
17273 0 : memset(&m, 0, sizeof(m));
17274 0 : memset(&t, 0, sizeof(t));
17275 0 : *info = 0;
17276 0 : ae_vector_clear(s2);
17277 0 : ae_matrix_clear(v);
17278 0 : ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
17279 0 : ae_matrix_init(&u, 0, 0, DT_REAL, _state, ae_true);
17280 0 : ae_matrix_init(&vt, 0, 0, DT_REAL, _state, ae_true);
17281 0 : ae_vector_init(&m, 0, DT_REAL, _state, ae_true);
17282 0 : ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
17283 :
17284 :
17285 : /*
17286 : * Check input data
17287 : */
17288 0 : if( npoints<0||nvars<1 )
17289 : {
17290 0 : *info = -1;
17291 0 : ae_frame_leave(_state);
17292 0 : return;
17293 : }
17294 0 : *info = 1;
17295 :
17296 : /*
17297 : * Special case: NPoints=0
17298 : */
17299 0 : if( npoints==0 )
17300 : {
17301 0 : ae_vector_set_length(s2, nvars, _state);
17302 0 : ae_matrix_set_length(v, nvars, nvars, _state);
17303 0 : for(i=0; i<=nvars-1; i++)
17304 : {
17305 0 : s2->ptr.p_double[i] = (double)(0);
17306 : }
17307 0 : for(i=0; i<=nvars-1; i++)
17308 : {
17309 0 : for(j=0; j<=nvars-1; j++)
17310 : {
17311 0 : if( i==j )
17312 : {
17313 0 : v->ptr.pp_double[i][j] = (double)(1);
17314 : }
17315 : else
17316 : {
17317 0 : v->ptr.pp_double[i][j] = (double)(0);
17318 : }
17319 : }
17320 : }
17321 0 : ae_frame_leave(_state);
17322 0 : return;
17323 : }
17324 :
17325 : /*
17326 : * Calculate means
17327 : */
17328 0 : ae_vector_set_length(&m, nvars, _state);
17329 0 : ae_vector_set_length(&t, npoints, _state);
17330 0 : for(j=0; j<=nvars-1; j++)
17331 : {
17332 0 : ae_v_move(&t.ptr.p_double[0], 1, &x->ptr.pp_double[0][j], x->stride, ae_v_len(0,npoints-1));
17333 0 : samplemoments(&t, npoints, &mean, &variance, &skewness, &kurtosis, _state);
17334 0 : m.ptr.p_double[j] = mean;
17335 : }
17336 :
17337 : /*
17338 : * Center, apply SVD, prepare output
17339 : */
17340 0 : ae_matrix_set_length(&a, ae_maxint(npoints, nvars, _state), nvars, _state);
17341 0 : for(i=0; i<=npoints-1; i++)
17342 : {
17343 0 : ae_v_move(&a.ptr.pp_double[i][0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
17344 0 : ae_v_sub(&a.ptr.pp_double[i][0], 1, &m.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
17345 : }
17346 0 : for(i=npoints; i<=nvars-1; i++)
17347 : {
17348 0 : for(j=0; j<=nvars-1; j++)
17349 : {
17350 0 : a.ptr.pp_double[i][j] = (double)(0);
17351 : }
17352 : }
17353 0 : if( !rmatrixsvd(&a, ae_maxint(npoints, nvars, _state), nvars, 0, 1, 2, s2, &u, &vt, _state) )
17354 : {
17355 0 : *info = -4;
17356 0 : ae_frame_leave(_state);
17357 0 : return;
17358 : }
17359 0 : if( npoints!=1 )
17360 : {
17361 0 : for(i=0; i<=nvars-1; i++)
17362 : {
17363 0 : s2->ptr.p_double[i] = ae_sqr(s2->ptr.p_double[i], _state)/(npoints-1);
17364 : }
17365 : }
17366 0 : ae_matrix_set_length(v, nvars, nvars, _state);
17367 0 : copyandtranspose(&vt, 0, nvars-1, 0, nvars-1, v, 0, nvars-1, 0, nvars-1, _state);
17368 0 : ae_frame_leave(_state);
17369 : }
17370 :
17371 :
17372 : /*************************************************************************
17373 : Principal components analysis
17374 :
17375 : This function performs truncated PCA, i.e. returns just a few most important
17376 : directions.
17377 :
17378 : Internally it uses iterative eigensolver which is very efficient when only
17379 : a minor fraction of full basis is required. Thus, if you need full basis,
17380 : it is better to use pcabuildbasis() function.
17381 :
17382 : It should be noted that, unlike LDA, PCA does not use class labels.
17383 :
17384 : ! COMMERCIAL EDITION OF ALGLIB:
17385 : !
17386 : ! Commercial Edition of ALGLIB includes following important improvements
17387 : ! of this function:
17388 : ! * high-performance native backend with same C# interface (C# version)
17389 : ! * multithreading support (C++ and C# versions)
17390 : ! * hardware vendor (Intel) implementations of linear algebra primitives
17391 : ! (C++ and C# versions, x86/x64 platform)
17392 : !
17393 : ! We recommend you to read 'Working with commercial version' section of
17394 : ! ALGLIB Reference Manual in order to find out how to use performance-
17395 : ! related features provided by commercial edition of ALGLIB.
17396 :
17397 : INPUT PARAMETERS:
17398 : X - dataset, array[0..NPoints-1,0..NVars-1].
17399 : matrix contains ONLY INDEPENDENT VARIABLES.
17400 : NPoints - dataset size, NPoints>=0
17401 : NVars - number of independent variables, NVars>=1
17402 : NNeeded - number of requested components, in [1,NVars] range;
17403 : this function is efficient only for NNeeded<<NVars.
17404 : Eps - desired precision of vectors returned; underlying
17405 : solver will stop iterations as soon as absolute error
17406 : in corresponding singular values reduces to roughly
17407 : eps*MAX(lambda[]), with lambda[] being array of eigen
17408 : values.
17409 : Zero value means that algorithm performs number of
17410 : iterations specified by maxits parameter, without
17411 : paying attention to precision.
17412 : MaxIts - number of iterations performed by subspace iteration
17413 : method. Zero value means that no limit on iteration
17414 : count is placed (eps-based stopping condition is used).
17415 :
17416 :
17417 : OUTPUT PARAMETERS:
17418 : S2 - array[NNeeded]. Variance values corresponding
17419 : to basis vectors.
17420 : V - array[NVars,NNeeded]
17421 : matrix, whose columns store basis vectors.
17422 :
17423 : NOTE: passing eps=0 and maxits=0 results in small eps being selected as
17424 : stopping condition. Exact value of automatically selected eps is version-
17425 : -dependent.
17426 :
17427 : -- ALGLIB --
17428 : Copyright 10.01.2017 by Bochkanov Sergey
17429 : *************************************************************************/
17430 0 : void pcatruncatedsubspace(/* Real */ ae_matrix* x,
17431 : ae_int_t npoints,
17432 : ae_int_t nvars,
17433 : ae_int_t nneeded,
17434 : double eps,
17435 : ae_int_t maxits,
17436 : /* Real */ ae_vector* s2,
17437 : /* Real */ ae_matrix* v,
17438 : ae_state *_state)
17439 : {
17440 : ae_frame _frame_block;
17441 : ae_matrix a;
17442 : ae_matrix b;
17443 : ae_vector means;
17444 : ae_int_t i;
17445 : ae_int_t j;
17446 : ae_int_t k;
17447 : double vv;
17448 : eigsubspacestate solver;
17449 : eigsubspacereport rep;
17450 :
17451 0 : ae_frame_make(_state, &_frame_block);
17452 0 : memset(&a, 0, sizeof(a));
17453 0 : memset(&b, 0, sizeof(b));
17454 0 : memset(&means, 0, sizeof(means));
17455 0 : memset(&solver, 0, sizeof(solver));
17456 0 : memset(&rep, 0, sizeof(rep));
17457 0 : ae_vector_clear(s2);
17458 0 : ae_matrix_clear(v);
17459 0 : ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
17460 0 : ae_matrix_init(&b, 0, 0, DT_REAL, _state, ae_true);
17461 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
17462 0 : _eigsubspacestate_init(&solver, _state, ae_true);
17463 0 : _eigsubspacereport_init(&rep, _state, ae_true);
17464 :
17465 0 : ae_assert(npoints>=0, "PCATruncatedSubspace: npoints<0", _state);
17466 0 : ae_assert(nvars>=1, "PCATruncatedSubspace: nvars<1", _state);
17467 0 : ae_assert(nneeded>0, "PCATruncatedSubspace: nneeded<1", _state);
17468 0 : ae_assert(nneeded<=nvars, "PCATruncatedSubspace: nneeded>nvars", _state);
17469 0 : ae_assert(maxits>=0, "PCATruncatedSubspace: maxits<0", _state);
17470 0 : ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "PCATruncatedSubspace: eps<0 or is not finite", _state);
17471 0 : ae_assert(x->rows>=npoints, "PCATruncatedSubspace: rows(x)<npoints", _state);
17472 0 : ae_assert(x->cols>=nvars||npoints==0, "PCATruncatedSubspace: cols(x)<nvars", _state);
17473 :
17474 : /*
17475 : * Special case: NPoints=0
17476 : */
17477 0 : if( npoints==0 )
17478 : {
17479 0 : ae_vector_set_length(s2, nneeded, _state);
17480 0 : ae_matrix_set_length(v, nvars, nneeded, _state);
17481 0 : for(i=0; i<=nvars-1; i++)
17482 : {
17483 0 : s2->ptr.p_double[i] = (double)(0);
17484 : }
17485 0 : for(i=0; i<=nvars-1; i++)
17486 : {
17487 0 : for(j=0; j<=nneeded-1; j++)
17488 : {
17489 0 : if( i==j )
17490 : {
17491 0 : v->ptr.pp_double[i][j] = (double)(1);
17492 : }
17493 : else
17494 : {
17495 0 : v->ptr.pp_double[i][j] = (double)(0);
17496 : }
17497 : }
17498 : }
17499 0 : ae_frame_leave(_state);
17500 0 : return;
17501 : }
17502 :
17503 : /*
17504 : * Center matrix
17505 : */
17506 0 : ae_vector_set_length(&means, nvars, _state);
17507 0 : for(i=0; i<=nvars-1; i++)
17508 : {
17509 0 : means.ptr.p_double[i] = (double)(0);
17510 : }
17511 0 : vv = (double)1/(double)npoints;
17512 0 : for(i=0; i<=npoints-1; i++)
17513 : {
17514 0 : ae_v_addd(&means.ptr.p_double[0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), vv);
17515 : }
17516 0 : ae_matrix_set_length(&a, npoints, nvars, _state);
17517 0 : for(i=0; i<=npoints-1; i++)
17518 : {
17519 0 : ae_v_move(&a.ptr.pp_double[i][0], 1, &x->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
17520 0 : ae_v_sub(&a.ptr.pp_double[i][0], 1, &means.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
17521 : }
17522 :
17523 : /*
17524 : * Find eigenvalues with subspace iteration solver
17525 : */
17526 0 : eigsubspacecreate(nvars, nneeded, &solver, _state);
17527 0 : eigsubspacesetcond(&solver, eps, maxits, _state);
17528 0 : eigsubspaceoocstart(&solver, 0, _state);
17529 0 : while(eigsubspaceooccontinue(&solver, _state))
17530 : {
17531 0 : ae_assert(solver.requesttype==0, "PCATruncatedSubspace: integrity check failed", _state);
17532 0 : k = solver.requestsize;
17533 0 : rmatrixsetlengthatleast(&b, npoints, k, _state);
17534 0 : rmatrixgemm(npoints, k, nvars, 1.0, &a, 0, 0, 0, &solver.x, 0, 0, 0, 0.0, &b, 0, 0, _state);
17535 0 : rmatrixgemm(nvars, k, npoints, 1.0, &a, 0, 0, 1, &b, 0, 0, 0, 0.0, &solver.ax, 0, 0, _state);
17536 : }
17537 0 : eigsubspaceoocstop(&solver, s2, v, &rep, _state);
17538 0 : if( npoints!=1 )
17539 : {
17540 0 : for(i=0; i<=nneeded-1; i++)
17541 : {
17542 0 : s2->ptr.p_double[i] = s2->ptr.p_double[i]/(npoints-1);
17543 : }
17544 : }
17545 0 : ae_frame_leave(_state);
17546 : }
17547 :
17548 :
17549 : /*************************************************************************
17550 : Sparse truncated principal components analysis
17551 :
17552 : This function performs sparse truncated PCA, i.e. returns just a few most
17553 : important principal components for a sparse input X.
17554 :
17555 : Internally it uses iterative eigensolver which is very efficient when only
17556 : a minor fraction of full basis is required.
17557 :
17558 : It should be noted that, unlike LDA, PCA does not use class labels.
17559 :
17560 : ! COMMERCIAL EDITION OF ALGLIB:
17561 : !
17562 : ! Commercial Edition of ALGLIB includes following important improvements
17563 : ! of this function:
17564 : ! * high-performance native backend with same C# interface (C# version)
17565 : ! * multithreading support (C++ and C# versions)
17566 : ! * hardware vendor (Intel) implementations of linear algebra primitives
17567 : ! (C++ and C# versions, x86/x64 platform)
17568 : !
17569 : ! We recommend you to read 'Working with commercial version' section of
17570 : ! ALGLIB Reference Manual in order to find out how to use performance-
17571 : ! related features provided by commercial edition of ALGLIB.
17572 :
17573 : INPUT PARAMETERS:
17574 : X - sparse dataset, sparse npoints*nvars matrix. It is
17575 : recommended to use CRS sparse storage format; non-CRS
17576 : input will be internally converted to CRS.
17577 : Matrix contains ONLY INDEPENDENT VARIABLES, and must
17578 : be EXACTLY npoints*nvars.
17579 : NPoints - dataset size, NPoints>=0
17580 : NVars - number of independent variables, NVars>=1
17581 : NNeeded - number of requested components, in [1,NVars] range;
17582 : this function is efficient only for NNeeded<<NVars.
17583 : Eps - desired precision of vectors returned; underlying
17584 : solver will stop iterations as soon as absolute error
17585 : in corresponding singular values reduces to roughly
17586 : eps*MAX(lambda[]), with lambda[] being array of eigen
17587 : values.
17588 : Zero value means that algorithm performs number of
17589 : iterations specified by maxits parameter, without
17590 : paying attention to precision.
17591 : MaxIts - number of iterations performed by subspace iteration
17592 : method. Zero value means that no limit on iteration
17593 : count is placed (eps-based stopping condition is used).
17594 :
17595 :
17596 : OUTPUT PARAMETERS:
17597 : S2 - array[NNeeded]. Variance values corresponding
17598 : to basis vectors.
17599 : V - array[NVars,NNeeded]
17600 : matrix, whose columns store basis vectors.
17601 :
17602 : NOTE: passing eps=0 and maxits=0 results in small eps being selected as
17603 : a stopping condition. Exact value of automatically selected eps is
17604 : version-dependent.
17605 :
17606 : NOTE: zero MaxIts is silently replaced by some reasonable value which
17607 : prevents eternal loops (possible when inputs are degenerate and too
17608 : stringent stopping criteria are specified). In current version it
17609 : is 50+2*NVars.
17610 :
17611 : -- ALGLIB --
17612 : Copyright 10.01.2017 by Bochkanov Sergey
17613 : *************************************************************************/
17614 0 : void pcatruncatedsubspacesparse(sparsematrix* x,
17615 : ae_int_t npoints,
17616 : ae_int_t nvars,
17617 : ae_int_t nneeded,
17618 : double eps,
17619 : ae_int_t maxits,
17620 : /* Real */ ae_vector* s2,
17621 : /* Real */ ae_matrix* v,
17622 : ae_state *_state)
17623 : {
17624 : ae_frame _frame_block;
17625 : sparsematrix xcrs;
17626 : ae_vector b1;
17627 : ae_vector c1;
17628 : ae_vector z1;
17629 : ae_int_t i;
17630 : ae_int_t j;
17631 : ae_int_t k;
17632 : double vv;
17633 : ae_vector means;
17634 : eigsubspacestate solver;
17635 : eigsubspacereport rep;
17636 :
17637 0 : ae_frame_make(_state, &_frame_block);
17638 0 : memset(&xcrs, 0, sizeof(xcrs));
17639 0 : memset(&b1, 0, sizeof(b1));
17640 0 : memset(&c1, 0, sizeof(c1));
17641 0 : memset(&z1, 0, sizeof(z1));
17642 0 : memset(&means, 0, sizeof(means));
17643 0 : memset(&solver, 0, sizeof(solver));
17644 0 : memset(&rep, 0, sizeof(rep));
17645 0 : ae_vector_clear(s2);
17646 0 : ae_matrix_clear(v);
17647 0 : _sparsematrix_init(&xcrs, _state, ae_true);
17648 0 : ae_vector_init(&b1, 0, DT_REAL, _state, ae_true);
17649 0 : ae_vector_init(&c1, 0, DT_REAL, _state, ae_true);
17650 0 : ae_vector_init(&z1, 0, DT_REAL, _state, ae_true);
17651 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
17652 0 : _eigsubspacestate_init(&solver, _state, ae_true);
17653 0 : _eigsubspacereport_init(&rep, _state, ae_true);
17654 :
17655 0 : ae_assert(npoints>=0, "PCATruncatedSubspaceSparse: npoints<0", _state);
17656 0 : ae_assert(nvars>=1, "PCATruncatedSubspaceSparse: nvars<1", _state);
17657 0 : ae_assert(nneeded>0, "PCATruncatedSubspaceSparse: nneeded<1", _state);
17658 0 : ae_assert(nneeded<=nvars, "PCATruncatedSubspaceSparse: nneeded>nvars", _state);
17659 0 : ae_assert(maxits>=0, "PCATruncatedSubspaceSparse: maxits<0", _state);
17660 0 : ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "PCATruncatedSubspaceSparse: eps<0 or is not finite", _state);
17661 0 : if( npoints>0 )
17662 : {
17663 0 : ae_assert(sparsegetnrows(x, _state)==npoints, "PCATruncatedSubspaceSparse: rows(x)!=npoints", _state);
17664 0 : ae_assert(sparsegetncols(x, _state)==nvars, "PCATruncatedSubspaceSparse: cols(x)!=nvars", _state);
17665 : }
17666 :
17667 : /*
17668 : * Special case: NPoints=0
17669 : */
17670 0 : if( npoints==0 )
17671 : {
17672 0 : ae_vector_set_length(s2, nneeded, _state);
17673 0 : ae_matrix_set_length(v, nvars, nneeded, _state);
17674 0 : for(i=0; i<=nvars-1; i++)
17675 : {
17676 0 : s2->ptr.p_double[i] = (double)(0);
17677 : }
17678 0 : for(i=0; i<=nvars-1; i++)
17679 : {
17680 0 : for(j=0; j<=nneeded-1; j++)
17681 : {
17682 0 : if( i==j )
17683 : {
17684 0 : v->ptr.pp_double[i][j] = (double)(1);
17685 : }
17686 : else
17687 : {
17688 0 : v->ptr.pp_double[i][j] = (double)(0);
17689 : }
17690 : }
17691 : }
17692 0 : ae_frame_leave(_state);
17693 0 : return;
17694 : }
17695 :
17696 : /*
17697 : * If input data are not in CRS format, perform conversion to CRS
17698 : */
17699 0 : if( !sparseiscrs(x, _state) )
17700 : {
17701 0 : sparsecopytocrs(x, &xcrs, _state);
17702 0 : pcatruncatedsubspacesparse(&xcrs, npoints, nvars, nneeded, eps, maxits, s2, v, _state);
17703 0 : ae_frame_leave(_state);
17704 0 : return;
17705 : }
17706 :
17707 : /*
17708 : * Initialize parameters, prepare buffers
17709 : */
17710 0 : ae_vector_set_length(&b1, npoints, _state);
17711 0 : ae_vector_set_length(&z1, nvars, _state);
17712 0 : if( ae_fp_eq(eps,(double)(0))&&maxits==0 )
17713 : {
17714 0 : eps = 1.0E-6;
17715 : }
17716 0 : if( maxits==0 )
17717 : {
17718 0 : maxits = 50+2*nvars;
17719 : }
17720 :
17721 : /*
17722 : * Calculate mean values
17723 : */
17724 0 : vv = (double)1/(double)npoints;
17725 0 : for(i=0; i<=npoints-1; i++)
17726 : {
17727 0 : b1.ptr.p_double[i] = vv;
17728 : }
17729 0 : sparsemtv(x, &b1, &means, _state);
17730 :
17731 : /*
17732 : * Find eigenvalues with subspace iteration solver
17733 : */
17734 0 : eigsubspacecreate(nvars, nneeded, &solver, _state);
17735 0 : eigsubspacesetcond(&solver, eps, maxits, _state);
17736 0 : eigsubspaceoocstart(&solver, 0, _state);
17737 0 : while(eigsubspaceooccontinue(&solver, _state))
17738 : {
17739 0 : ae_assert(solver.requesttype==0, "PCATruncatedSubspace: integrity check failed", _state);
17740 0 : for(k=0; k<=solver.requestsize-1; k++)
17741 : {
17742 :
17743 : /*
17744 : * Calculate B1=(X-meansX)*Zk
17745 : */
17746 0 : ae_v_move(&z1.ptr.p_double[0], 1, &solver.x.ptr.pp_double[0][k], solver.x.stride, ae_v_len(0,nvars-1));
17747 0 : sparsemv(x, &z1, &b1, _state);
17748 0 : vv = ae_v_dotproduct(&solver.x.ptr.pp_double[0][k], solver.x.stride, &means.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
17749 0 : for(i=0; i<=npoints-1; i++)
17750 : {
17751 0 : b1.ptr.p_double[i] = b1.ptr.p_double[i]-vv;
17752 : }
17753 :
17754 : /*
17755 : * Calculate (X-meansX)^T*B1
17756 : */
17757 0 : sparsemtv(x, &b1, &c1, _state);
17758 0 : vv = (double)(0);
17759 0 : for(i=0; i<=npoints-1; i++)
17760 : {
17761 0 : vv = vv+b1.ptr.p_double[i];
17762 : }
17763 0 : for(j=0; j<=nvars-1; j++)
17764 : {
17765 0 : solver.ax.ptr.pp_double[j][k] = c1.ptr.p_double[j]-vv*means.ptr.p_double[j];
17766 : }
17767 : }
17768 : }
17769 0 : eigsubspaceoocstop(&solver, s2, v, &rep, _state);
17770 0 : if( npoints!=1 )
17771 : {
17772 0 : for(i=0; i<=nneeded-1; i++)
17773 : {
17774 0 : s2->ptr.p_double[i] = s2->ptr.p_double[i]/(npoints-1);
17775 : }
17776 : }
17777 0 : ae_frame_leave(_state);
17778 : }
17779 :
17780 :
17781 : #endif
17782 : #if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
17783 :
17784 :
17785 : /*************************************************************************
17786 : This set of routines (DSErrAllocate, DSErrAccumulate, DSErrFinish)
17787 : calculates different error functions (classification error, cross-entropy,
17788 : rms, avg, avg.rel errors).
17789 :
17790 : 1. DSErrAllocate prepares buffer.
17791 : 2. DSErrAccumulate accumulates individual errors:
17792 : * Y contains predicted output (posterior probabilities for classification)
17793 : * DesiredY contains desired output (class number for classification)
17794 : 3. DSErrFinish outputs results:
17795 : * Buf[0] contains relative classification error (zero for regression tasks)
17796 : * Buf[1] contains avg. cross-entropy (zero for regression tasks)
17797 : * Buf[2] contains rms error (regression, classification)
17798 : * Buf[3] contains average error (regression, classification)
17799 : * Buf[4] contains average relative error (regression, classification)
17800 :
17801 : NOTES(1):
17802 : "NClasses>0" means that we have classification task.
17803 : "NClasses<0" means regression task with -NClasses real outputs.
17804 :
17805 : NOTES(2):
17806 : rms. avg, avg.rel errors for classification tasks are interpreted as
17807 : errors in posterior probabilities with respect to probabilities given
17808 : by training/test set.
17809 :
17810 : -- ALGLIB --
17811 : Copyright 11.01.2009 by Bochkanov Sergey
17812 : *************************************************************************/
17813 0 : void dserrallocate(ae_int_t nclasses,
17814 : /* Real */ ae_vector* buf,
17815 : ae_state *_state)
17816 : {
17817 :
17818 0 : ae_vector_clear(buf);
17819 :
17820 0 : ae_vector_set_length(buf, 7+1, _state);
17821 0 : buf->ptr.p_double[0] = (double)(0);
17822 0 : buf->ptr.p_double[1] = (double)(0);
17823 0 : buf->ptr.p_double[2] = (double)(0);
17824 0 : buf->ptr.p_double[3] = (double)(0);
17825 0 : buf->ptr.p_double[4] = (double)(0);
17826 0 : buf->ptr.p_double[5] = (double)(nclasses);
17827 0 : buf->ptr.p_double[6] = (double)(0);
17828 0 : buf->ptr.p_double[7] = (double)(0);
17829 0 : }
17830 :
17831 :
17832 : /*************************************************************************
17833 : See DSErrAllocate for comments on this routine.
17834 :
17835 : -- ALGLIB --
17836 : Copyright 11.01.2009 by Bochkanov Sergey
17837 : *************************************************************************/
17838 0 : void dserraccumulate(/* Real */ ae_vector* buf,
17839 : /* Real */ ae_vector* y,
17840 : /* Real */ ae_vector* desiredy,
17841 : ae_state *_state)
17842 : {
17843 : ae_int_t nclasses;
17844 : ae_int_t nout;
17845 : ae_int_t offs;
17846 : ae_int_t mmax;
17847 : ae_int_t rmax;
17848 : ae_int_t j;
17849 : double v;
17850 : double ev;
17851 :
17852 :
17853 0 : offs = 5;
17854 0 : nclasses = ae_round(buf->ptr.p_double[offs], _state);
17855 0 : if( nclasses>0 )
17856 : {
17857 :
17858 : /*
17859 : * Classification
17860 : */
17861 0 : rmax = ae_round(desiredy->ptr.p_double[0], _state);
17862 0 : mmax = 0;
17863 0 : for(j=1; j<=nclasses-1; j++)
17864 : {
17865 0 : if( ae_fp_greater(y->ptr.p_double[j],y->ptr.p_double[mmax]) )
17866 : {
17867 0 : mmax = j;
17868 : }
17869 : }
17870 0 : if( mmax!=rmax )
17871 : {
17872 0 : buf->ptr.p_double[0] = buf->ptr.p_double[0]+1;
17873 : }
17874 0 : if( ae_fp_greater(y->ptr.p_double[rmax],(double)(0)) )
17875 : {
17876 0 : buf->ptr.p_double[1] = buf->ptr.p_double[1]-ae_log(y->ptr.p_double[rmax], _state);
17877 : }
17878 : else
17879 : {
17880 0 : buf->ptr.p_double[1] = buf->ptr.p_double[1]+ae_log(ae_maxrealnumber, _state);
17881 : }
17882 0 : for(j=0; j<=nclasses-1; j++)
17883 : {
17884 0 : v = y->ptr.p_double[j];
17885 0 : if( j==rmax )
17886 : {
17887 0 : ev = (double)(1);
17888 : }
17889 : else
17890 : {
17891 0 : ev = (double)(0);
17892 : }
17893 0 : buf->ptr.p_double[2] = buf->ptr.p_double[2]+ae_sqr(v-ev, _state);
17894 0 : buf->ptr.p_double[3] = buf->ptr.p_double[3]+ae_fabs(v-ev, _state);
17895 0 : if( ae_fp_neq(ev,(double)(0)) )
17896 : {
17897 0 : buf->ptr.p_double[4] = buf->ptr.p_double[4]+ae_fabs((v-ev)/ev, _state);
17898 0 : buf->ptr.p_double[offs+2] = buf->ptr.p_double[offs+2]+1;
17899 : }
17900 : }
17901 0 : buf->ptr.p_double[offs+1] = buf->ptr.p_double[offs+1]+1;
17902 : }
17903 : else
17904 : {
17905 :
17906 : /*
17907 : * Regression
17908 : */
17909 0 : nout = -nclasses;
17910 0 : rmax = 0;
17911 0 : for(j=1; j<=nout-1; j++)
17912 : {
17913 0 : if( ae_fp_greater(desiredy->ptr.p_double[j],desiredy->ptr.p_double[rmax]) )
17914 : {
17915 0 : rmax = j;
17916 : }
17917 : }
17918 0 : mmax = 0;
17919 0 : for(j=1; j<=nout-1; j++)
17920 : {
17921 0 : if( ae_fp_greater(y->ptr.p_double[j],y->ptr.p_double[mmax]) )
17922 : {
17923 0 : mmax = j;
17924 : }
17925 : }
17926 0 : if( mmax!=rmax )
17927 : {
17928 0 : buf->ptr.p_double[0] = buf->ptr.p_double[0]+1;
17929 : }
17930 0 : for(j=0; j<=nout-1; j++)
17931 : {
17932 0 : v = y->ptr.p_double[j];
17933 0 : ev = desiredy->ptr.p_double[j];
17934 0 : buf->ptr.p_double[2] = buf->ptr.p_double[2]+ae_sqr(v-ev, _state);
17935 0 : buf->ptr.p_double[3] = buf->ptr.p_double[3]+ae_fabs(v-ev, _state);
17936 0 : if( ae_fp_neq(ev,(double)(0)) )
17937 : {
17938 0 : buf->ptr.p_double[4] = buf->ptr.p_double[4]+ae_fabs((v-ev)/ev, _state);
17939 0 : buf->ptr.p_double[offs+2] = buf->ptr.p_double[offs+2]+1;
17940 : }
17941 : }
17942 0 : buf->ptr.p_double[offs+1] = buf->ptr.p_double[offs+1]+1;
17943 : }
17944 0 : }
17945 :
17946 :
17947 : /*************************************************************************
17948 : See DSErrAllocate for comments on this routine.
17949 :
17950 : -- ALGLIB --
17951 : Copyright 11.01.2009 by Bochkanov Sergey
17952 : *************************************************************************/
17953 0 : void dserrfinish(/* Real */ ae_vector* buf, ae_state *_state)
17954 : {
17955 : ae_int_t nout;
17956 : ae_int_t offs;
17957 :
17958 :
17959 0 : offs = 5;
17960 0 : nout = ae_iabs(ae_round(buf->ptr.p_double[offs], _state), _state);
17961 0 : if( ae_fp_neq(buf->ptr.p_double[offs+1],(double)(0)) )
17962 : {
17963 0 : buf->ptr.p_double[0] = buf->ptr.p_double[0]/buf->ptr.p_double[offs+1];
17964 0 : buf->ptr.p_double[1] = buf->ptr.p_double[1]/buf->ptr.p_double[offs+1];
17965 0 : buf->ptr.p_double[2] = ae_sqrt(buf->ptr.p_double[2]/(nout*buf->ptr.p_double[offs+1]), _state);
17966 0 : buf->ptr.p_double[3] = buf->ptr.p_double[3]/(nout*buf->ptr.p_double[offs+1]);
17967 : }
17968 0 : if( ae_fp_neq(buf->ptr.p_double[offs+2],(double)(0)) )
17969 : {
17970 0 : buf->ptr.p_double[4] = buf->ptr.p_double[4]/buf->ptr.p_double[offs+2];
17971 : }
17972 0 : }
17973 :
17974 :
17975 : /*************************************************************************
17976 :
17977 : -- ALGLIB --
17978 : Copyright 19.05.2008 by Bochkanov Sergey
17979 : *************************************************************************/
17980 0 : void dsnormalize(/* Real */ ae_matrix* xy,
17981 : ae_int_t npoints,
17982 : ae_int_t nvars,
17983 : ae_int_t* info,
17984 : /* Real */ ae_vector* means,
17985 : /* Real */ ae_vector* sigmas,
17986 : ae_state *_state)
17987 : {
17988 : ae_frame _frame_block;
17989 : ae_int_t i;
17990 : ae_int_t j;
17991 : ae_vector tmp;
17992 : double mean;
17993 : double variance;
17994 : double skewness;
17995 : double kurtosis;
17996 :
17997 0 : ae_frame_make(_state, &_frame_block);
17998 0 : memset(&tmp, 0, sizeof(tmp));
17999 0 : *info = 0;
18000 0 : ae_vector_clear(means);
18001 0 : ae_vector_clear(sigmas);
18002 0 : ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
18003 :
18004 :
18005 : /*
18006 : * Test parameters
18007 : */
18008 0 : if( npoints<=0||nvars<1 )
18009 : {
18010 0 : *info = -1;
18011 0 : ae_frame_leave(_state);
18012 0 : return;
18013 : }
18014 0 : *info = 1;
18015 :
18016 : /*
18017 : * Standartization
18018 : */
18019 0 : ae_vector_set_length(means, nvars-1+1, _state);
18020 0 : ae_vector_set_length(sigmas, nvars-1+1, _state);
18021 0 : ae_vector_set_length(&tmp, npoints-1+1, _state);
18022 0 : for(j=0; j<=nvars-1; j++)
18023 : {
18024 0 : ae_v_move(&tmp.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
18025 0 : samplemoments(&tmp, npoints, &mean, &variance, &skewness, &kurtosis, _state);
18026 0 : means->ptr.p_double[j] = mean;
18027 0 : sigmas->ptr.p_double[j] = ae_sqrt(variance, _state);
18028 0 : if( ae_fp_eq(sigmas->ptr.p_double[j],(double)(0)) )
18029 : {
18030 0 : sigmas->ptr.p_double[j] = (double)(1);
18031 : }
18032 0 : for(i=0; i<=npoints-1; i++)
18033 : {
18034 0 : xy->ptr.pp_double[i][j] = (xy->ptr.pp_double[i][j]-means->ptr.p_double[j])/sigmas->ptr.p_double[j];
18035 : }
18036 : }
18037 0 : ae_frame_leave(_state);
18038 : }
18039 :
18040 :
18041 : /*************************************************************************
18042 :
18043 : -- ALGLIB --
18044 : Copyright 19.05.2008 by Bochkanov Sergey
18045 : *************************************************************************/
18046 0 : void dsnormalizec(/* Real */ ae_matrix* xy,
18047 : ae_int_t npoints,
18048 : ae_int_t nvars,
18049 : ae_int_t* info,
18050 : /* Real */ ae_vector* means,
18051 : /* Real */ ae_vector* sigmas,
18052 : ae_state *_state)
18053 : {
18054 : ae_frame _frame_block;
18055 : ae_int_t j;
18056 : ae_vector tmp;
18057 : double mean;
18058 : double variance;
18059 : double skewness;
18060 : double kurtosis;
18061 :
18062 0 : ae_frame_make(_state, &_frame_block);
18063 0 : memset(&tmp, 0, sizeof(tmp));
18064 0 : *info = 0;
18065 0 : ae_vector_clear(means);
18066 0 : ae_vector_clear(sigmas);
18067 0 : ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
18068 :
18069 :
18070 : /*
18071 : * Test parameters
18072 : */
18073 0 : if( npoints<=0||nvars<1 )
18074 : {
18075 0 : *info = -1;
18076 0 : ae_frame_leave(_state);
18077 0 : return;
18078 : }
18079 0 : *info = 1;
18080 :
18081 : /*
18082 : * Standartization
18083 : */
18084 0 : ae_vector_set_length(means, nvars-1+1, _state);
18085 0 : ae_vector_set_length(sigmas, nvars-1+1, _state);
18086 0 : ae_vector_set_length(&tmp, npoints-1+1, _state);
18087 0 : for(j=0; j<=nvars-1; j++)
18088 : {
18089 0 : ae_v_move(&tmp.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
18090 0 : samplemoments(&tmp, npoints, &mean, &variance, &skewness, &kurtosis, _state);
18091 0 : means->ptr.p_double[j] = mean;
18092 0 : sigmas->ptr.p_double[j] = ae_sqrt(variance, _state);
18093 0 : if( ae_fp_eq(sigmas->ptr.p_double[j],(double)(0)) )
18094 : {
18095 0 : sigmas->ptr.p_double[j] = (double)(1);
18096 : }
18097 : }
18098 0 : ae_frame_leave(_state);
18099 : }
18100 :
18101 :
18102 : /*************************************************************************
18103 :
18104 : -- ALGLIB --
18105 : Copyright 19.05.2008 by Bochkanov Sergey
18106 : *************************************************************************/
18107 0 : double dsgetmeanmindistance(/* Real */ ae_matrix* xy,
18108 : ae_int_t npoints,
18109 : ae_int_t nvars,
18110 : ae_state *_state)
18111 : {
18112 : ae_frame _frame_block;
18113 : ae_int_t i;
18114 : ae_int_t j;
18115 : ae_vector tmp;
18116 : ae_vector tmp2;
18117 : double v;
18118 : double result;
18119 :
18120 0 : ae_frame_make(_state, &_frame_block);
18121 0 : memset(&tmp, 0, sizeof(tmp));
18122 0 : memset(&tmp2, 0, sizeof(tmp2));
18123 0 : ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
18124 0 : ae_vector_init(&tmp2, 0, DT_REAL, _state, ae_true);
18125 :
18126 :
18127 : /*
18128 : * Test parameters
18129 : */
18130 0 : if( npoints<=0||nvars<1 )
18131 : {
18132 0 : result = (double)(0);
18133 0 : ae_frame_leave(_state);
18134 0 : return result;
18135 : }
18136 :
18137 : /*
18138 : * Process
18139 : */
18140 0 : ae_vector_set_length(&tmp, npoints-1+1, _state);
18141 0 : for(i=0; i<=npoints-1; i++)
18142 : {
18143 0 : tmp.ptr.p_double[i] = ae_maxrealnumber;
18144 : }
18145 0 : ae_vector_set_length(&tmp2, nvars-1+1, _state);
18146 0 : for(i=0; i<=npoints-1; i++)
18147 : {
18148 0 : for(j=i+1; j<=npoints-1; j++)
18149 : {
18150 0 : ae_v_move(&tmp2.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
18151 0 : ae_v_sub(&tmp2.ptr.p_double[0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
18152 0 : v = ae_v_dotproduct(&tmp2.ptr.p_double[0], 1, &tmp2.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
18153 0 : v = ae_sqrt(v, _state);
18154 0 : tmp.ptr.p_double[i] = ae_minreal(tmp.ptr.p_double[i], v, _state);
18155 0 : tmp.ptr.p_double[j] = ae_minreal(tmp.ptr.p_double[j], v, _state);
18156 : }
18157 : }
18158 0 : result = (double)(0);
18159 0 : for(i=0; i<=npoints-1; i++)
18160 : {
18161 0 : result = result+tmp.ptr.p_double[i]/npoints;
18162 : }
18163 0 : ae_frame_leave(_state);
18164 0 : return result;
18165 : }
18166 :
18167 :
18168 : /*************************************************************************
18169 :
18170 : -- ALGLIB --
18171 : Copyright 19.05.2008 by Bochkanov Sergey
18172 : *************************************************************************/
18173 0 : void dstie(/* Real */ ae_vector* a,
18174 : ae_int_t n,
18175 : /* Integer */ ae_vector* ties,
18176 : ae_int_t* tiecount,
18177 : /* Integer */ ae_vector* p1,
18178 : /* Integer */ ae_vector* p2,
18179 : ae_state *_state)
18180 : {
18181 : ae_frame _frame_block;
18182 : ae_int_t i;
18183 : ae_int_t k;
18184 : ae_vector tmp;
18185 :
18186 0 : ae_frame_make(_state, &_frame_block);
18187 0 : memset(&tmp, 0, sizeof(tmp));
18188 0 : ae_vector_clear(ties);
18189 0 : *tiecount = 0;
18190 0 : ae_vector_clear(p1);
18191 0 : ae_vector_clear(p2);
18192 0 : ae_vector_init(&tmp, 0, DT_INT, _state, ae_true);
18193 :
18194 :
18195 : /*
18196 : * Special case
18197 : */
18198 0 : if( n<=0 )
18199 : {
18200 0 : *tiecount = 0;
18201 0 : ae_frame_leave(_state);
18202 0 : return;
18203 : }
18204 :
18205 : /*
18206 : * Sort A
18207 : */
18208 0 : tagsort(a, n, p1, p2, _state);
18209 :
18210 : /*
18211 : * Process ties
18212 : */
18213 0 : *tiecount = 1;
18214 0 : for(i=1; i<=n-1; i++)
18215 : {
18216 0 : if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
18217 : {
18218 0 : *tiecount = *tiecount+1;
18219 : }
18220 : }
18221 0 : ae_vector_set_length(ties, *tiecount+1, _state);
18222 0 : ties->ptr.p_int[0] = 0;
18223 0 : k = 1;
18224 0 : for(i=1; i<=n-1; i++)
18225 : {
18226 0 : if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
18227 : {
18228 0 : ties->ptr.p_int[k] = i;
18229 0 : k = k+1;
18230 : }
18231 : }
18232 0 : ties->ptr.p_int[*tiecount] = n;
18233 0 : ae_frame_leave(_state);
18234 : }
18235 :
18236 :
18237 : /*************************************************************************
18238 :
18239 : -- ALGLIB --
18240 : Copyright 11.12.2008 by Bochkanov Sergey
18241 : *************************************************************************/
18242 0 : void dstiefasti(/* Real */ ae_vector* a,
18243 : /* Integer */ ae_vector* b,
18244 : ae_int_t n,
18245 : /* Integer */ ae_vector* ties,
18246 : ae_int_t* tiecount,
18247 : /* Real */ ae_vector* bufr,
18248 : /* Integer */ ae_vector* bufi,
18249 : ae_state *_state)
18250 : {
18251 : ae_frame _frame_block;
18252 : ae_int_t i;
18253 : ae_int_t k;
18254 : ae_vector tmp;
18255 :
18256 0 : ae_frame_make(_state, &_frame_block);
18257 0 : memset(&tmp, 0, sizeof(tmp));
18258 0 : *tiecount = 0;
18259 0 : ae_vector_init(&tmp, 0, DT_INT, _state, ae_true);
18260 :
18261 :
18262 : /*
18263 : * Special case
18264 : */
18265 0 : if( n<=0 )
18266 : {
18267 0 : *tiecount = 0;
18268 0 : ae_frame_leave(_state);
18269 0 : return;
18270 : }
18271 :
18272 : /*
18273 : * Sort A
18274 : */
18275 0 : tagsortfasti(a, b, bufr, bufi, n, _state);
18276 :
18277 : /*
18278 : * Process ties
18279 : */
18280 0 : ties->ptr.p_int[0] = 0;
18281 0 : k = 1;
18282 0 : for(i=1; i<=n-1; i++)
18283 : {
18284 0 : if( ae_fp_neq(a->ptr.p_double[i],a->ptr.p_double[i-1]) )
18285 : {
18286 0 : ties->ptr.p_int[k] = i;
18287 0 : k = k+1;
18288 : }
18289 : }
18290 0 : ties->ptr.p_int[k] = n;
18291 0 : *tiecount = k;
18292 0 : ae_frame_leave(_state);
18293 : }
18294 :
18295 :
18296 : /*************************************************************************
18297 : Optimal binary classification
18298 :
18299 : Algorithms finds optimal (=with minimal cross-entropy) binary partition.
18300 : Internal subroutine.
18301 :
18302 : INPUT PARAMETERS:
18303 : A - array[0..N-1], variable
18304 : C - array[0..N-1], class numbers (0 or 1).
18305 : N - array size
18306 :
18307 : OUTPUT PARAMETERS:
18308 : Info - completetion code:
18309 : * -3, all values of A[] are same (partition is impossible)
18310 : * -2, one of C[] is incorrect (<0, >1)
18311 : * -1, incorrect pararemets were passed (N<=0).
18312 : * 1, OK
18313 : Threshold- partiton boundary. Left part contains values which are
18314 : strictly less than Threshold. Right part contains values
18315 : which are greater than or equal to Threshold.
18316 : PAL, PBL- probabilities P(0|v<Threshold) and P(1|v<Threshold)
18317 : PAR, PBR- probabilities P(0|v>=Threshold) and P(1|v>=Threshold)
18318 : CVE - cross-validation estimate of cross-entropy
18319 :
18320 : -- ALGLIB --
18321 : Copyright 22.05.2008 by Bochkanov Sergey
18322 : *************************************************************************/
18323 0 : void dsoptimalsplit2(/* Real */ ae_vector* a,
18324 : /* Integer */ ae_vector* c,
18325 : ae_int_t n,
18326 : ae_int_t* info,
18327 : double* threshold,
18328 : double* pal,
18329 : double* pbl,
18330 : double* par,
18331 : double* pbr,
18332 : double* cve,
18333 : ae_state *_state)
18334 : {
18335 : ae_frame _frame_block;
18336 : ae_vector _a;
18337 : ae_vector _c;
18338 : ae_int_t i;
18339 : ae_int_t t;
18340 : double s;
18341 : ae_vector ties;
18342 : ae_int_t tiecount;
18343 : ae_vector p1;
18344 : ae_vector p2;
18345 : ae_int_t k;
18346 : ae_int_t koptimal;
18347 : double pak;
18348 : double pbk;
18349 : double cvoptimal;
18350 : double cv;
18351 :
18352 0 : ae_frame_make(_state, &_frame_block);
18353 0 : memset(&_a, 0, sizeof(_a));
18354 0 : memset(&_c, 0, sizeof(_c));
18355 0 : memset(&ties, 0, sizeof(ties));
18356 0 : memset(&p1, 0, sizeof(p1));
18357 0 : memset(&p2, 0, sizeof(p2));
18358 0 : ae_vector_init_copy(&_a, a, _state, ae_true);
18359 0 : a = &_a;
18360 0 : ae_vector_init_copy(&_c, c, _state, ae_true);
18361 0 : c = &_c;
18362 0 : *info = 0;
18363 0 : *threshold = 0;
18364 0 : *pal = 0;
18365 0 : *pbl = 0;
18366 0 : *par = 0;
18367 0 : *pbr = 0;
18368 0 : *cve = 0;
18369 0 : ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
18370 0 : ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
18371 0 : ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
18372 :
18373 :
18374 : /*
18375 : * Test for errors in inputs
18376 : */
18377 0 : if( n<=0 )
18378 : {
18379 0 : *info = -1;
18380 0 : ae_frame_leave(_state);
18381 0 : return;
18382 : }
18383 0 : for(i=0; i<=n-1; i++)
18384 : {
18385 0 : if( c->ptr.p_int[i]!=0&&c->ptr.p_int[i]!=1 )
18386 : {
18387 0 : *info = -2;
18388 0 : ae_frame_leave(_state);
18389 0 : return;
18390 : }
18391 : }
18392 0 : *info = 1;
18393 :
18394 : /*
18395 : * Tie
18396 : */
18397 0 : dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
18398 0 : for(i=0; i<=n-1; i++)
18399 : {
18400 0 : if( p2.ptr.p_int[i]!=i )
18401 : {
18402 0 : t = c->ptr.p_int[i];
18403 0 : c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
18404 0 : c->ptr.p_int[p2.ptr.p_int[i]] = t;
18405 : }
18406 : }
18407 :
18408 : /*
18409 : * Special case: number of ties is 1.
18410 : *
18411 : * NOTE: we assume that P[i,j] equals to 0 or 1,
18412 : * intermediate values are not allowed.
18413 : */
18414 0 : if( tiecount==1 )
18415 : {
18416 0 : *info = -3;
18417 0 : ae_frame_leave(_state);
18418 0 : return;
18419 : }
18420 :
18421 : /*
18422 : * General case, number of ties > 1
18423 : *
18424 : * NOTE: we assume that P[i,j] equals to 0 or 1,
18425 : * intermediate values are not allowed.
18426 : */
18427 0 : *pal = (double)(0);
18428 0 : *pbl = (double)(0);
18429 0 : *par = (double)(0);
18430 0 : *pbr = (double)(0);
18431 0 : for(i=0; i<=n-1; i++)
18432 : {
18433 0 : if( c->ptr.p_int[i]==0 )
18434 : {
18435 0 : *par = *par+1;
18436 : }
18437 0 : if( c->ptr.p_int[i]==1 )
18438 : {
18439 0 : *pbr = *pbr+1;
18440 : }
18441 : }
18442 0 : koptimal = -1;
18443 0 : cvoptimal = ae_maxrealnumber;
18444 0 : for(k=0; k<=tiecount-2; k++)
18445 : {
18446 :
18447 : /*
18448 : * first, obtain information about K-th tie which is
18449 : * moved from R-part to L-part
18450 : */
18451 0 : pak = (double)(0);
18452 0 : pbk = (double)(0);
18453 0 : for(i=ties.ptr.p_int[k]; i<=ties.ptr.p_int[k+1]-1; i++)
18454 : {
18455 0 : if( c->ptr.p_int[i]==0 )
18456 : {
18457 0 : pak = pak+1;
18458 : }
18459 0 : if( c->ptr.p_int[i]==1 )
18460 : {
18461 0 : pbk = pbk+1;
18462 : }
18463 : }
18464 :
18465 : /*
18466 : * Calculate cross-validation CE
18467 : */
18468 0 : cv = (double)(0);
18469 0 : cv = cv-bdss_xlny(*pal+pak, (*pal+pak)/(*pal+pak+(*pbl)+pbk+1), _state);
18470 0 : cv = cv-bdss_xlny(*pbl+pbk, (*pbl+pbk)/(*pal+pak+1+(*pbl)+pbk), _state);
18471 0 : cv = cv-bdss_xlny(*par-pak, (*par-pak)/(*par-pak+(*pbr)-pbk+1), _state);
18472 0 : cv = cv-bdss_xlny(*pbr-pbk, (*pbr-pbk)/(*par-pak+1+(*pbr)-pbk), _state);
18473 :
18474 : /*
18475 : * Compare with best
18476 : */
18477 0 : if( ae_fp_less(cv,cvoptimal) )
18478 : {
18479 0 : cvoptimal = cv;
18480 0 : koptimal = k;
18481 : }
18482 :
18483 : /*
18484 : * update
18485 : */
18486 0 : *pal = *pal+pak;
18487 0 : *pbl = *pbl+pbk;
18488 0 : *par = *par-pak;
18489 0 : *pbr = *pbr-pbk;
18490 : }
18491 0 : *cve = cvoptimal;
18492 0 : *threshold = 0.5*(a->ptr.p_double[ties.ptr.p_int[koptimal]]+a->ptr.p_double[ties.ptr.p_int[koptimal+1]]);
18493 0 : *pal = (double)(0);
18494 0 : *pbl = (double)(0);
18495 0 : *par = (double)(0);
18496 0 : *pbr = (double)(0);
18497 0 : for(i=0; i<=n-1; i++)
18498 : {
18499 0 : if( ae_fp_less(a->ptr.p_double[i],*threshold) )
18500 : {
18501 0 : if( c->ptr.p_int[i]==0 )
18502 : {
18503 0 : *pal = *pal+1;
18504 : }
18505 : else
18506 : {
18507 0 : *pbl = *pbl+1;
18508 : }
18509 : }
18510 : else
18511 : {
18512 0 : if( c->ptr.p_int[i]==0 )
18513 : {
18514 0 : *par = *par+1;
18515 : }
18516 : else
18517 : {
18518 0 : *pbr = *pbr+1;
18519 : }
18520 : }
18521 : }
18522 0 : s = *pal+(*pbl);
18523 0 : *pal = *pal/s;
18524 0 : *pbl = *pbl/s;
18525 0 : s = *par+(*pbr);
18526 0 : *par = *par/s;
18527 0 : *pbr = *pbr/s;
18528 0 : ae_frame_leave(_state);
18529 : }
18530 :
18531 :
18532 : /*************************************************************************
18533 : Optimal partition, internal subroutine. Fast version.
18534 :
18535 : Accepts:
18536 : A array[0..N-1] array of attributes array[0..N-1]
18537 : C array[0..N-1] array of class labels
18538 : TiesBuf array[0..N] temporaries (ties)
18539 : CntBuf array[0..2*NC-1] temporaries (counts)
18540 : Alpha centering factor (0<=alpha<=1, recommended value - 0.05)
18541 : BufR array[0..N-1] temporaries
18542 : BufI array[0..N-1] temporaries
18543 :
18544 : Output:
18545 : Info error code (">0"=OK, "<0"=bad)
18546 : RMS training set RMS error
18547 : CVRMS leave-one-out RMS error
18548 :
18549 : Note:
18550 : content of all arrays is changed by subroutine;
18551 : it doesn't allocate temporaries.
18552 :
18553 : -- ALGLIB --
18554 : Copyright 11.12.2008 by Bochkanov Sergey
18555 : *************************************************************************/
18556 0 : void dsoptimalsplit2fast(/* Real */ ae_vector* a,
18557 : /* Integer */ ae_vector* c,
18558 : /* Integer */ ae_vector* tiesbuf,
18559 : /* Integer */ ae_vector* cntbuf,
18560 : /* Real */ ae_vector* bufr,
18561 : /* Integer */ ae_vector* bufi,
18562 : ae_int_t n,
18563 : ae_int_t nc,
18564 : double alpha,
18565 : ae_int_t* info,
18566 : double* threshold,
18567 : double* rms,
18568 : double* cvrms,
18569 : ae_state *_state)
18570 : {
18571 : ae_int_t i;
18572 : ae_int_t k;
18573 : ae_int_t cl;
18574 : ae_int_t tiecount;
18575 : double cbest;
18576 : double cc;
18577 : ae_int_t koptimal;
18578 : ae_int_t sl;
18579 : ae_int_t sr;
18580 : double v;
18581 : double w;
18582 : double x;
18583 :
18584 0 : *info = 0;
18585 0 : *threshold = 0;
18586 0 : *rms = 0;
18587 0 : *cvrms = 0;
18588 :
18589 :
18590 : /*
18591 : * Test for errors in inputs
18592 : */
18593 0 : if( n<=0||nc<2 )
18594 : {
18595 0 : *info = -1;
18596 0 : return;
18597 : }
18598 0 : for(i=0; i<=n-1; i++)
18599 : {
18600 0 : if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
18601 : {
18602 0 : *info = -2;
18603 0 : return;
18604 : }
18605 : }
18606 0 : *info = 1;
18607 :
18608 : /*
18609 : * Tie
18610 : */
18611 0 : dstiefasti(a, c, n, tiesbuf, &tiecount, bufr, bufi, _state);
18612 :
18613 : /*
18614 : * Special case: number of ties is 1.
18615 : */
18616 0 : if( tiecount==1 )
18617 : {
18618 0 : *info = -3;
18619 0 : return;
18620 : }
18621 :
18622 : /*
18623 : * General case, number of ties > 1
18624 : */
18625 0 : for(i=0; i<=2*nc-1; i++)
18626 : {
18627 0 : cntbuf->ptr.p_int[i] = 0;
18628 : }
18629 0 : for(i=0; i<=n-1; i++)
18630 : {
18631 0 : cntbuf->ptr.p_int[nc+c->ptr.p_int[i]] = cntbuf->ptr.p_int[nc+c->ptr.p_int[i]]+1;
18632 : }
18633 0 : koptimal = -1;
18634 0 : *threshold = a->ptr.p_double[n-1];
18635 0 : cbest = ae_maxrealnumber;
18636 0 : sl = 0;
18637 0 : sr = n;
18638 0 : for(k=0; k<=tiecount-2; k++)
18639 : {
18640 :
18641 : /*
18642 : * first, move Kth tie from right to left
18643 : */
18644 0 : for(i=tiesbuf->ptr.p_int[k]; i<=tiesbuf->ptr.p_int[k+1]-1; i++)
18645 : {
18646 0 : cl = c->ptr.p_int[i];
18647 0 : cntbuf->ptr.p_int[cl] = cntbuf->ptr.p_int[cl]+1;
18648 0 : cntbuf->ptr.p_int[nc+cl] = cntbuf->ptr.p_int[nc+cl]-1;
18649 : }
18650 0 : sl = sl+(tiesbuf->ptr.p_int[k+1]-tiesbuf->ptr.p_int[k]);
18651 0 : sr = sr-(tiesbuf->ptr.p_int[k+1]-tiesbuf->ptr.p_int[k]);
18652 :
18653 : /*
18654 : * Calculate RMS error
18655 : */
18656 0 : v = (double)(0);
18657 0 : for(i=0; i<=nc-1; i++)
18658 : {
18659 0 : w = (double)(cntbuf->ptr.p_int[i]);
18660 0 : v = v+w*ae_sqr(w/sl-1, _state);
18661 0 : v = v+(sl-w)*ae_sqr(w/sl, _state);
18662 0 : w = (double)(cntbuf->ptr.p_int[nc+i]);
18663 0 : v = v+w*ae_sqr(w/sr-1, _state);
18664 0 : v = v+(sr-w)*ae_sqr(w/sr, _state);
18665 : }
18666 0 : v = ae_sqrt(v/(nc*n), _state);
18667 :
18668 : /*
18669 : * Compare with best
18670 : */
18671 0 : x = (double)(2*sl)/(double)(sl+sr)-1;
18672 0 : cc = v*(1-alpha+alpha*ae_sqr(x, _state));
18673 0 : if( ae_fp_less(cc,cbest) )
18674 : {
18675 :
18676 : /*
18677 : * store split
18678 : */
18679 0 : *rms = v;
18680 0 : koptimal = k;
18681 0 : cbest = cc;
18682 :
18683 : /*
18684 : * calculate CVRMS error
18685 : */
18686 0 : *cvrms = (double)(0);
18687 0 : for(i=0; i<=nc-1; i++)
18688 : {
18689 0 : if( sl>1 )
18690 : {
18691 0 : w = (double)(cntbuf->ptr.p_int[i]);
18692 0 : *cvrms = *cvrms+w*ae_sqr((w-1)/(sl-1)-1, _state);
18693 0 : *cvrms = *cvrms+(sl-w)*ae_sqr(w/(sl-1), _state);
18694 : }
18695 : else
18696 : {
18697 0 : w = (double)(cntbuf->ptr.p_int[i]);
18698 0 : *cvrms = *cvrms+w*ae_sqr((double)1/(double)nc-1, _state);
18699 0 : *cvrms = *cvrms+(sl-w)*ae_sqr((double)1/(double)nc, _state);
18700 : }
18701 0 : if( sr>1 )
18702 : {
18703 0 : w = (double)(cntbuf->ptr.p_int[nc+i]);
18704 0 : *cvrms = *cvrms+w*ae_sqr((w-1)/(sr-1)-1, _state);
18705 0 : *cvrms = *cvrms+(sr-w)*ae_sqr(w/(sr-1), _state);
18706 : }
18707 : else
18708 : {
18709 0 : w = (double)(cntbuf->ptr.p_int[nc+i]);
18710 0 : *cvrms = *cvrms+w*ae_sqr((double)1/(double)nc-1, _state);
18711 0 : *cvrms = *cvrms+(sr-w)*ae_sqr((double)1/(double)nc, _state);
18712 : }
18713 : }
18714 0 : *cvrms = ae_sqrt(*cvrms/(nc*n), _state);
18715 : }
18716 : }
18717 :
18718 : /*
18719 : * Calculate threshold.
18720 : * Code is a bit complicated because there can be such
18721 : * numbers that 0.5(A+B) equals to A or B (if A-B=epsilon)
18722 : */
18723 0 : *threshold = 0.5*(a->ptr.p_double[tiesbuf->ptr.p_int[koptimal]]+a->ptr.p_double[tiesbuf->ptr.p_int[koptimal+1]]);
18724 0 : if( ae_fp_less_eq(*threshold,a->ptr.p_double[tiesbuf->ptr.p_int[koptimal]]) )
18725 : {
18726 0 : *threshold = a->ptr.p_double[tiesbuf->ptr.p_int[koptimal+1]];
18727 : }
18728 : }
18729 :
18730 :
18731 : /*************************************************************************
18732 : Automatic non-optimal discretization, internal subroutine.
18733 :
18734 : -- ALGLIB --
18735 : Copyright 22.05.2008 by Bochkanov Sergey
18736 : *************************************************************************/
18737 0 : void dssplitk(/* Real */ ae_vector* a,
18738 : /* Integer */ ae_vector* c,
18739 : ae_int_t n,
18740 : ae_int_t nc,
18741 : ae_int_t kmax,
18742 : ae_int_t* info,
18743 : /* Real */ ae_vector* thresholds,
18744 : ae_int_t* ni,
18745 : double* cve,
18746 : ae_state *_state)
18747 : {
18748 : ae_frame _frame_block;
18749 : ae_vector _a;
18750 : ae_vector _c;
18751 : ae_int_t i;
18752 : ae_int_t j;
18753 : ae_int_t j1;
18754 : ae_int_t k;
18755 : ae_vector ties;
18756 : ae_int_t tiecount;
18757 : ae_vector p1;
18758 : ae_vector p2;
18759 : ae_vector cnt;
18760 : double v2;
18761 : ae_int_t bestk;
18762 : double bestcve;
18763 : ae_vector bestsizes;
18764 : double curcve;
18765 : ae_vector cursizes;
18766 :
18767 0 : ae_frame_make(_state, &_frame_block);
18768 0 : memset(&_a, 0, sizeof(_a));
18769 0 : memset(&_c, 0, sizeof(_c));
18770 0 : memset(&ties, 0, sizeof(ties));
18771 0 : memset(&p1, 0, sizeof(p1));
18772 0 : memset(&p2, 0, sizeof(p2));
18773 0 : memset(&cnt, 0, sizeof(cnt));
18774 0 : memset(&bestsizes, 0, sizeof(bestsizes));
18775 0 : memset(&cursizes, 0, sizeof(cursizes));
18776 0 : ae_vector_init_copy(&_a, a, _state, ae_true);
18777 0 : a = &_a;
18778 0 : ae_vector_init_copy(&_c, c, _state, ae_true);
18779 0 : c = &_c;
18780 0 : *info = 0;
18781 0 : ae_vector_clear(thresholds);
18782 0 : *ni = 0;
18783 0 : *cve = 0;
18784 0 : ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
18785 0 : ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
18786 0 : ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
18787 0 : ae_vector_init(&cnt, 0, DT_INT, _state, ae_true);
18788 0 : ae_vector_init(&bestsizes, 0, DT_INT, _state, ae_true);
18789 0 : ae_vector_init(&cursizes, 0, DT_INT, _state, ae_true);
18790 :
18791 :
18792 : /*
18793 : * Test for errors in inputs
18794 : */
18795 0 : if( (n<=0||nc<2)||kmax<2 )
18796 : {
18797 0 : *info = -1;
18798 0 : ae_frame_leave(_state);
18799 0 : return;
18800 : }
18801 0 : for(i=0; i<=n-1; i++)
18802 : {
18803 0 : if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
18804 : {
18805 0 : *info = -2;
18806 0 : ae_frame_leave(_state);
18807 0 : return;
18808 : }
18809 : }
18810 0 : *info = 1;
18811 :
18812 : /*
18813 : * Tie
18814 : */
18815 0 : dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
18816 0 : for(i=0; i<=n-1; i++)
18817 : {
18818 0 : if( p2.ptr.p_int[i]!=i )
18819 : {
18820 0 : k = c->ptr.p_int[i];
18821 0 : c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
18822 0 : c->ptr.p_int[p2.ptr.p_int[i]] = k;
18823 : }
18824 : }
18825 :
18826 : /*
18827 : * Special cases
18828 : */
18829 0 : if( tiecount==1 )
18830 : {
18831 0 : *info = -3;
18832 0 : ae_frame_leave(_state);
18833 0 : return;
18834 : }
18835 :
18836 : /*
18837 : * General case:
18838 : * 0. allocate arrays
18839 : */
18840 0 : kmax = ae_minint(kmax, tiecount, _state);
18841 0 : ae_vector_set_length(&bestsizes, kmax-1+1, _state);
18842 0 : ae_vector_set_length(&cursizes, kmax-1+1, _state);
18843 0 : ae_vector_set_length(&cnt, nc-1+1, _state);
18844 :
18845 : /*
18846 : * General case:
18847 : * 1. prepare "weak" solution (two subintervals, divided at median)
18848 : */
18849 0 : v2 = ae_maxrealnumber;
18850 0 : j = -1;
18851 0 : for(i=1; i<=tiecount-1; i++)
18852 : {
18853 0 : if( ae_fp_less(ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state),v2) )
18854 : {
18855 0 : v2 = ae_fabs(ties.ptr.p_int[i]-0.5*n, _state);
18856 0 : j = i;
18857 : }
18858 : }
18859 0 : ae_assert(j>0, "DSSplitK: internal error #1!", _state);
18860 0 : bestk = 2;
18861 0 : bestsizes.ptr.p_int[0] = ties.ptr.p_int[j];
18862 0 : bestsizes.ptr.p_int[1] = n-j;
18863 0 : bestcve = (double)(0);
18864 0 : for(i=0; i<=nc-1; i++)
18865 : {
18866 0 : cnt.ptr.p_int[i] = 0;
18867 : }
18868 0 : for(i=0; i<=j-1; i++)
18869 : {
18870 0 : bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
18871 : }
18872 0 : bestcve = bestcve+bdss_getcv(&cnt, nc, _state);
18873 0 : for(i=0; i<=nc-1; i++)
18874 : {
18875 0 : cnt.ptr.p_int[i] = 0;
18876 : }
18877 0 : for(i=j; i<=tiecount-1; i++)
18878 : {
18879 0 : bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
18880 : }
18881 0 : bestcve = bestcve+bdss_getcv(&cnt, nc, _state);
18882 :
18883 : /*
18884 : * General case:
18885 : * 2. Use greedy algorithm to find sub-optimal split in O(KMax*N) time
18886 : */
18887 0 : for(k=2; k<=kmax; k++)
18888 : {
18889 :
18890 : /*
18891 : * Prepare greedy K-interval split
18892 : */
18893 0 : for(i=0; i<=k-1; i++)
18894 : {
18895 0 : cursizes.ptr.p_int[i] = 0;
18896 : }
18897 0 : i = 0;
18898 0 : j = 0;
18899 0 : while(j<=tiecount-1&&i<=k-1)
18900 : {
18901 :
18902 : /*
18903 : * Rule: I-th bin is empty, fill it
18904 : */
18905 0 : if( cursizes.ptr.p_int[i]==0 )
18906 : {
18907 0 : cursizes.ptr.p_int[i] = ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
18908 0 : j = j+1;
18909 0 : continue;
18910 : }
18911 :
18912 : /*
18913 : * Rule: (K-1-I) bins left, (K-1-I) ties left (1 tie per bin); next bin
18914 : */
18915 0 : if( tiecount-j==k-1-i )
18916 : {
18917 0 : i = i+1;
18918 0 : continue;
18919 : }
18920 :
18921 : /*
18922 : * Rule: last bin, always place in current
18923 : */
18924 0 : if( i==k-1 )
18925 : {
18926 0 : cursizes.ptr.p_int[i] = cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
18927 0 : j = j+1;
18928 0 : continue;
18929 : }
18930 :
18931 : /*
18932 : * Place J-th tie in I-th bin, or leave for I+1-th bin.
18933 : */
18934 0 : if( ae_fp_less(ae_fabs(cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j]-(double)n/(double)k, _state),ae_fabs(cursizes.ptr.p_int[i]-(double)n/(double)k, _state)) )
18935 : {
18936 0 : cursizes.ptr.p_int[i] = cursizes.ptr.p_int[i]+ties.ptr.p_int[j+1]-ties.ptr.p_int[j];
18937 0 : j = j+1;
18938 : }
18939 : else
18940 : {
18941 0 : i = i+1;
18942 : }
18943 : }
18944 0 : ae_assert(cursizes.ptr.p_int[k-1]!=0&&j==tiecount, "DSSplitK: internal error #1", _state);
18945 :
18946 : /*
18947 : * Calculate CVE
18948 : */
18949 0 : curcve = (double)(0);
18950 0 : j = 0;
18951 0 : for(i=0; i<=k-1; i++)
18952 : {
18953 0 : for(j1=0; j1<=nc-1; j1++)
18954 : {
18955 0 : cnt.ptr.p_int[j1] = 0;
18956 : }
18957 0 : for(j1=j; j1<=j+cursizes.ptr.p_int[i]-1; j1++)
18958 : {
18959 0 : cnt.ptr.p_int[c->ptr.p_int[j1]] = cnt.ptr.p_int[c->ptr.p_int[j1]]+1;
18960 : }
18961 0 : curcve = curcve+bdss_getcv(&cnt, nc, _state);
18962 0 : j = j+cursizes.ptr.p_int[i];
18963 : }
18964 :
18965 : /*
18966 : * Choose best variant
18967 : */
18968 0 : if( ae_fp_less(curcve,bestcve) )
18969 : {
18970 0 : for(i=0; i<=k-1; i++)
18971 : {
18972 0 : bestsizes.ptr.p_int[i] = cursizes.ptr.p_int[i];
18973 : }
18974 0 : bestcve = curcve;
18975 0 : bestk = k;
18976 : }
18977 : }
18978 :
18979 : /*
18980 : * Transform from sizes to thresholds
18981 : */
18982 0 : *cve = bestcve;
18983 0 : *ni = bestk;
18984 0 : ae_vector_set_length(thresholds, *ni-2+1, _state);
18985 0 : j = bestsizes.ptr.p_int[0];
18986 0 : for(i=1; i<=bestk-1; i++)
18987 : {
18988 0 : thresholds->ptr.p_double[i-1] = 0.5*(a->ptr.p_double[j-1]+a->ptr.p_double[j]);
18989 0 : j = j+bestsizes.ptr.p_int[i];
18990 : }
18991 0 : ae_frame_leave(_state);
18992 : }
18993 :
18994 :
18995 : /*************************************************************************
18996 : Automatic optimal discretization, internal subroutine.
18997 :
18998 : -- ALGLIB --
18999 : Copyright 22.05.2008 by Bochkanov Sergey
19000 : *************************************************************************/
19001 0 : void dsoptimalsplitk(/* Real */ ae_vector* a,
19002 : /* Integer */ ae_vector* c,
19003 : ae_int_t n,
19004 : ae_int_t nc,
19005 : ae_int_t kmax,
19006 : ae_int_t* info,
19007 : /* Real */ ae_vector* thresholds,
19008 : ae_int_t* ni,
19009 : double* cve,
19010 : ae_state *_state)
19011 : {
19012 : ae_frame _frame_block;
19013 : ae_vector _a;
19014 : ae_vector _c;
19015 : ae_int_t i;
19016 : ae_int_t j;
19017 : ae_int_t s;
19018 : ae_int_t jl;
19019 : ae_int_t jr;
19020 : double v2;
19021 : ae_vector ties;
19022 : ae_int_t tiecount;
19023 : ae_vector p1;
19024 : ae_vector p2;
19025 : double cvtemp;
19026 : ae_vector cnt;
19027 : ae_vector cnt2;
19028 : ae_matrix cv;
19029 : ae_matrix splits;
19030 : ae_int_t k;
19031 : ae_int_t koptimal;
19032 : double cvoptimal;
19033 :
19034 0 : ae_frame_make(_state, &_frame_block);
19035 0 : memset(&_a, 0, sizeof(_a));
19036 0 : memset(&_c, 0, sizeof(_c));
19037 0 : memset(&ties, 0, sizeof(ties));
19038 0 : memset(&p1, 0, sizeof(p1));
19039 0 : memset(&p2, 0, sizeof(p2));
19040 0 : memset(&cnt, 0, sizeof(cnt));
19041 0 : memset(&cnt2, 0, sizeof(cnt2));
19042 0 : memset(&cv, 0, sizeof(cv));
19043 0 : memset(&splits, 0, sizeof(splits));
19044 0 : ae_vector_init_copy(&_a, a, _state, ae_true);
19045 0 : a = &_a;
19046 0 : ae_vector_init_copy(&_c, c, _state, ae_true);
19047 0 : c = &_c;
19048 0 : *info = 0;
19049 0 : ae_vector_clear(thresholds);
19050 0 : *ni = 0;
19051 0 : *cve = 0;
19052 0 : ae_vector_init(&ties, 0, DT_INT, _state, ae_true);
19053 0 : ae_vector_init(&p1, 0, DT_INT, _state, ae_true);
19054 0 : ae_vector_init(&p2, 0, DT_INT, _state, ae_true);
19055 0 : ae_vector_init(&cnt, 0, DT_INT, _state, ae_true);
19056 0 : ae_vector_init(&cnt2, 0, DT_INT, _state, ae_true);
19057 0 : ae_matrix_init(&cv, 0, 0, DT_REAL, _state, ae_true);
19058 0 : ae_matrix_init(&splits, 0, 0, DT_INT, _state, ae_true);
19059 :
19060 :
19061 : /*
19062 : * Test for errors in inputs
19063 : */
19064 0 : if( (n<=0||nc<2)||kmax<2 )
19065 : {
19066 0 : *info = -1;
19067 0 : ae_frame_leave(_state);
19068 0 : return;
19069 : }
19070 0 : for(i=0; i<=n-1; i++)
19071 : {
19072 0 : if( c->ptr.p_int[i]<0||c->ptr.p_int[i]>=nc )
19073 : {
19074 0 : *info = -2;
19075 0 : ae_frame_leave(_state);
19076 0 : return;
19077 : }
19078 : }
19079 0 : *info = 1;
19080 :
19081 : /*
19082 : * Tie
19083 : */
19084 0 : dstie(a, n, &ties, &tiecount, &p1, &p2, _state);
19085 0 : for(i=0; i<=n-1; i++)
19086 : {
19087 0 : if( p2.ptr.p_int[i]!=i )
19088 : {
19089 0 : k = c->ptr.p_int[i];
19090 0 : c->ptr.p_int[i] = c->ptr.p_int[p2.ptr.p_int[i]];
19091 0 : c->ptr.p_int[p2.ptr.p_int[i]] = k;
19092 : }
19093 : }
19094 :
19095 : /*
19096 : * Special cases
19097 : */
19098 0 : if( tiecount==1 )
19099 : {
19100 0 : *info = -3;
19101 0 : ae_frame_leave(_state);
19102 0 : return;
19103 : }
19104 :
19105 : /*
19106 : * General case
19107 : * Use dynamic programming to find best split in O(KMax*NC*TieCount^2) time
19108 : */
19109 0 : kmax = ae_minint(kmax, tiecount, _state);
19110 0 : ae_matrix_set_length(&cv, kmax-1+1, tiecount-1+1, _state);
19111 0 : ae_matrix_set_length(&splits, kmax-1+1, tiecount-1+1, _state);
19112 0 : ae_vector_set_length(&cnt, nc-1+1, _state);
19113 0 : ae_vector_set_length(&cnt2, nc-1+1, _state);
19114 0 : for(j=0; j<=nc-1; j++)
19115 : {
19116 0 : cnt.ptr.p_int[j] = 0;
19117 : }
19118 0 : for(j=0; j<=tiecount-1; j++)
19119 : {
19120 0 : bdss_tieaddc(c, &ties, j, nc, &cnt, _state);
19121 0 : splits.ptr.pp_int[0][j] = 0;
19122 0 : cv.ptr.pp_double[0][j] = bdss_getcv(&cnt, nc, _state);
19123 : }
19124 0 : for(k=1; k<=kmax-1; k++)
19125 : {
19126 0 : for(j=0; j<=nc-1; j++)
19127 : {
19128 0 : cnt.ptr.p_int[j] = 0;
19129 : }
19130 :
19131 : /*
19132 : * Subtask size J in [K..TieCount-1]:
19133 : * optimal K-splitting on ties from 0-th to J-th.
19134 : */
19135 0 : for(j=k; j<=tiecount-1; j++)
19136 : {
19137 :
19138 : /*
19139 : * Update Cnt - let it contain classes of ties from K-th to J-th
19140 : */
19141 0 : bdss_tieaddc(c, &ties, j, nc, &cnt, _state);
19142 :
19143 : /*
19144 : * Search for optimal split point S in [K..J]
19145 : */
19146 0 : for(i=0; i<=nc-1; i++)
19147 : {
19148 0 : cnt2.ptr.p_int[i] = cnt.ptr.p_int[i];
19149 : }
19150 0 : cv.ptr.pp_double[k][j] = cv.ptr.pp_double[k-1][j-1]+bdss_getcv(&cnt2, nc, _state);
19151 0 : splits.ptr.pp_int[k][j] = j;
19152 0 : for(s=k+1; s<=j; s++)
19153 : {
19154 :
19155 : /*
19156 : * Update Cnt2 - let it contain classes of ties from S-th to J-th
19157 : */
19158 0 : bdss_tiesubc(c, &ties, s-1, nc, &cnt2, _state);
19159 :
19160 : /*
19161 : * Calculate CVE
19162 : */
19163 0 : cvtemp = cv.ptr.pp_double[k-1][s-1]+bdss_getcv(&cnt2, nc, _state);
19164 0 : if( ae_fp_less(cvtemp,cv.ptr.pp_double[k][j]) )
19165 : {
19166 0 : cv.ptr.pp_double[k][j] = cvtemp;
19167 0 : splits.ptr.pp_int[k][j] = s;
19168 : }
19169 : }
19170 : }
19171 : }
19172 :
19173 : /*
19174 : * Choose best partition, output result
19175 : */
19176 0 : koptimal = -1;
19177 0 : cvoptimal = ae_maxrealnumber;
19178 0 : for(k=0; k<=kmax-1; k++)
19179 : {
19180 0 : if( ae_fp_less(cv.ptr.pp_double[k][tiecount-1],cvoptimal) )
19181 : {
19182 0 : cvoptimal = cv.ptr.pp_double[k][tiecount-1];
19183 0 : koptimal = k;
19184 : }
19185 : }
19186 0 : ae_assert(koptimal>=0, "DSOptimalSplitK: internal error #1!", _state);
19187 0 : if( koptimal==0 )
19188 : {
19189 :
19190 : /*
19191 : * Special case: best partition is one big interval.
19192 : * Even 2-partition is not better.
19193 : * This is possible when dealing with "weak" predictor variables.
19194 : *
19195 : * Make binary split as close to the median as possible.
19196 : */
19197 0 : v2 = ae_maxrealnumber;
19198 0 : j = -1;
19199 0 : for(i=1; i<=tiecount-1; i++)
19200 : {
19201 0 : if( ae_fp_less(ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state),v2) )
19202 : {
19203 0 : v2 = ae_fabs(ties.ptr.p_int[i]-0.5*(n-1), _state);
19204 0 : j = i;
19205 : }
19206 : }
19207 0 : ae_assert(j>0, "DSOptimalSplitK: internal error #2!", _state);
19208 0 : ae_vector_set_length(thresholds, 0+1, _state);
19209 0 : thresholds->ptr.p_double[0] = 0.5*(a->ptr.p_double[ties.ptr.p_int[j-1]]+a->ptr.p_double[ties.ptr.p_int[j]]);
19210 0 : *ni = 2;
19211 0 : *cve = (double)(0);
19212 0 : for(i=0; i<=nc-1; i++)
19213 : {
19214 0 : cnt.ptr.p_int[i] = 0;
19215 : }
19216 0 : for(i=0; i<=j-1; i++)
19217 : {
19218 0 : bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
19219 : }
19220 0 : *cve = *cve+bdss_getcv(&cnt, nc, _state);
19221 0 : for(i=0; i<=nc-1; i++)
19222 : {
19223 0 : cnt.ptr.p_int[i] = 0;
19224 : }
19225 0 : for(i=j; i<=tiecount-1; i++)
19226 : {
19227 0 : bdss_tieaddc(c, &ties, i, nc, &cnt, _state);
19228 : }
19229 0 : *cve = *cve+bdss_getcv(&cnt, nc, _state);
19230 : }
19231 : else
19232 : {
19233 :
19234 : /*
19235 : * General case: 2 or more intervals
19236 : *
19237 : * NOTE: we initialize both JL and JR (left and right bounds),
19238 : * altough algorithm needs only JL.
19239 : */
19240 0 : ae_vector_set_length(thresholds, koptimal-1+1, _state);
19241 0 : *ni = koptimal+1;
19242 0 : *cve = cv.ptr.pp_double[koptimal][tiecount-1];
19243 0 : jl = splits.ptr.pp_int[koptimal][tiecount-1];
19244 0 : jr = tiecount-1;
19245 0 : for(k=koptimal; k>=1; k--)
19246 : {
19247 0 : thresholds->ptr.p_double[k-1] = 0.5*(a->ptr.p_double[ties.ptr.p_int[jl-1]]+a->ptr.p_double[ties.ptr.p_int[jl]]);
19248 0 : jr = jl-1;
19249 0 : jl = splits.ptr.pp_int[k-1][jl-1];
19250 : }
19251 0 : touchint(&jr, _state);
19252 : }
19253 0 : ae_frame_leave(_state);
19254 : }
19255 :
19256 :
19257 : /*************************************************************************
19258 : Internal function
19259 : *************************************************************************/
19260 0 : static double bdss_xlny(double x, double y, ae_state *_state)
19261 : {
19262 : double result;
19263 :
19264 :
19265 0 : if( ae_fp_eq(x,(double)(0)) )
19266 : {
19267 0 : result = (double)(0);
19268 : }
19269 : else
19270 : {
19271 0 : result = x*ae_log(y, _state);
19272 : }
19273 0 : return result;
19274 : }
19275 :
19276 :
19277 : /*************************************************************************
19278 : Internal function,
19279 : returns number of samples of class I in Cnt[I]
19280 : *************************************************************************/
19281 0 : static double bdss_getcv(/* Integer */ ae_vector* cnt,
19282 : ae_int_t nc,
19283 : ae_state *_state)
19284 : {
19285 : ae_int_t i;
19286 : double s;
19287 : double result;
19288 :
19289 :
19290 0 : s = (double)(0);
19291 0 : for(i=0; i<=nc-1; i++)
19292 : {
19293 0 : s = s+cnt->ptr.p_int[i];
19294 : }
19295 0 : result = (double)(0);
19296 0 : for(i=0; i<=nc-1; i++)
19297 : {
19298 0 : result = result-bdss_xlny((double)(cnt->ptr.p_int[i]), cnt->ptr.p_int[i]/(s+nc-1), _state);
19299 : }
19300 0 : return result;
19301 : }
19302 :
19303 :
19304 : /*************************************************************************
19305 : Internal function, adds number of samples of class I in tie NTie to Cnt[I]
19306 : *************************************************************************/
19307 0 : static void bdss_tieaddc(/* Integer */ ae_vector* c,
19308 : /* Integer */ ae_vector* ties,
19309 : ae_int_t ntie,
19310 : ae_int_t nc,
19311 : /* Integer */ ae_vector* cnt,
19312 : ae_state *_state)
19313 : {
19314 : ae_int_t i;
19315 :
19316 :
19317 0 : for(i=ties->ptr.p_int[ntie]; i<=ties->ptr.p_int[ntie+1]-1; i++)
19318 : {
19319 0 : cnt->ptr.p_int[c->ptr.p_int[i]] = cnt->ptr.p_int[c->ptr.p_int[i]]+1;
19320 : }
19321 0 : }
19322 :
19323 :
19324 : /*************************************************************************
19325 : Internal function, subtracts number of samples of class I in tie NTie to Cnt[I]
19326 : *************************************************************************/
19327 0 : static void bdss_tiesubc(/* Integer */ ae_vector* c,
19328 : /* Integer */ ae_vector* ties,
19329 : ae_int_t ntie,
19330 : ae_int_t nc,
19331 : /* Integer */ ae_vector* cnt,
19332 : ae_state *_state)
19333 : {
19334 : ae_int_t i;
19335 :
19336 :
19337 0 : for(i=ties->ptr.p_int[ntie]; i<=ties->ptr.p_int[ntie+1]-1; i++)
19338 : {
19339 0 : cnt->ptr.p_int[c->ptr.p_int[i]] = cnt->ptr.p_int[c->ptr.p_int[i]]-1;
19340 : }
19341 0 : }
19342 :
19343 :
19344 0 : void _cvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
19345 : {
19346 0 : cvreport *p = (cvreport*)_p;
19347 0 : ae_touch_ptr((void*)p);
19348 0 : }
19349 :
19350 :
19351 0 : void _cvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
19352 : {
19353 0 : cvreport *dst = (cvreport*)_dst;
19354 0 : cvreport *src = (cvreport*)_src;
19355 0 : dst->relclserror = src->relclserror;
19356 0 : dst->avgce = src->avgce;
19357 0 : dst->rmserror = src->rmserror;
19358 0 : dst->avgerror = src->avgerror;
19359 0 : dst->avgrelerror = src->avgrelerror;
19360 0 : }
19361 :
19362 :
19363 0 : void _cvreport_clear(void* _p)
19364 : {
19365 0 : cvreport *p = (cvreport*)_p;
19366 0 : ae_touch_ptr((void*)p);
19367 0 : }
19368 :
19369 :
19370 0 : void _cvreport_destroy(void* _p)
19371 : {
19372 0 : cvreport *p = (cvreport*)_p;
19373 0 : ae_touch_ptr((void*)p);
19374 0 : }
19375 :
19376 :
19377 : #endif
19378 : #if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
19379 :
19380 :
19381 : /*************************************************************************
19382 : This function returns number of weights updates which is required for
19383 : gradient calculation problem to be splitted.
19384 : *************************************************************************/
19385 0 : ae_int_t mlpgradsplitcost(ae_state *_state)
19386 : {
19387 : ae_int_t result;
19388 :
19389 :
19390 0 : result = mlpbase_gradbasecasecost;
19391 0 : return result;
19392 : }
19393 :
19394 :
19395 : /*************************************************************************
19396 : This function returns number of elements in subset of dataset which is
19397 : required for gradient calculation problem to be splitted.
19398 : *************************************************************************/
19399 0 : ae_int_t mlpgradsplitsize(ae_state *_state)
19400 : {
19401 : ae_int_t result;
19402 :
19403 :
19404 0 : result = mlpbase_microbatchsize;
19405 0 : return result;
19406 : }
19407 :
19408 :
19409 : /*************************************************************************
19410 : Creates neural network with NIn inputs, NOut outputs, without hidden
19411 : layers, with linear output layer. Network weights are filled with small
19412 : random values.
19413 :
19414 : -- ALGLIB --
19415 : Copyright 04.11.2007 by Bochkanov Sergey
19416 : *************************************************************************/
19417 0 : void mlpcreate0(ae_int_t nin,
19418 : ae_int_t nout,
19419 : multilayerperceptron* network,
19420 : ae_state *_state)
19421 : {
19422 : ae_frame _frame_block;
19423 : ae_vector lsizes;
19424 : ae_vector ltypes;
19425 : ae_vector lconnfirst;
19426 : ae_vector lconnlast;
19427 : ae_int_t layerscount;
19428 : ae_int_t lastproc;
19429 :
19430 0 : ae_frame_make(_state, &_frame_block);
19431 0 : memset(&lsizes, 0, sizeof(lsizes));
19432 0 : memset(<ypes, 0, sizeof(ltypes));
19433 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19434 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19435 0 : _multilayerperceptron_clear(network);
19436 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19437 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19438 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19439 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19440 :
19441 0 : layerscount = 1+3;
19442 :
19443 : /*
19444 : * Allocate arrays
19445 : */
19446 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19447 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19448 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19449 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19450 :
19451 : /*
19452 : * Layers
19453 : */
19454 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19455 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19456 0 : mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19457 :
19458 : /*
19459 : * Create
19460 : */
19461 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19462 0 : mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_true, _state);
19463 0 : ae_frame_leave(_state);
19464 0 : }
19465 :
19466 :
19467 : /*************************************************************************
19468 : Same as MLPCreate0, but with one hidden layer (NHid neurons) with
19469 : non-linear activation function. Output layer is linear.
19470 :
19471 : -- ALGLIB --
19472 : Copyright 04.11.2007 by Bochkanov Sergey
19473 : *************************************************************************/
19474 0 : void mlpcreate1(ae_int_t nin,
19475 : ae_int_t nhid,
19476 : ae_int_t nout,
19477 : multilayerperceptron* network,
19478 : ae_state *_state)
19479 : {
19480 : ae_frame _frame_block;
19481 : ae_vector lsizes;
19482 : ae_vector ltypes;
19483 : ae_vector lconnfirst;
19484 : ae_vector lconnlast;
19485 : ae_int_t layerscount;
19486 : ae_int_t lastproc;
19487 :
19488 0 : ae_frame_make(_state, &_frame_block);
19489 0 : memset(&lsizes, 0, sizeof(lsizes));
19490 0 : memset(<ypes, 0, sizeof(ltypes));
19491 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19492 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19493 0 : _multilayerperceptron_clear(network);
19494 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19495 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19496 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19497 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19498 :
19499 0 : layerscount = 1+3+3;
19500 :
19501 : /*
19502 : * Allocate arrays
19503 : */
19504 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19505 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19506 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19507 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19508 :
19509 : /*
19510 : * Layers
19511 : */
19512 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19513 0 : mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19514 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19515 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19516 0 : mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19517 :
19518 : /*
19519 : * Create
19520 : */
19521 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19522 0 : mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_true, _state);
19523 0 : ae_frame_leave(_state);
19524 0 : }
19525 :
19526 :
19527 : /*************************************************************************
19528 : Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons)
19529 : with non-linear activation function. Output layer is linear.
19530 : $ALL
19531 :
19532 : -- ALGLIB --
19533 : Copyright 04.11.2007 by Bochkanov Sergey
19534 : *************************************************************************/
19535 0 : void mlpcreate2(ae_int_t nin,
19536 : ae_int_t nhid1,
19537 : ae_int_t nhid2,
19538 : ae_int_t nout,
19539 : multilayerperceptron* network,
19540 : ae_state *_state)
19541 : {
19542 : ae_frame _frame_block;
19543 : ae_vector lsizes;
19544 : ae_vector ltypes;
19545 : ae_vector lconnfirst;
19546 : ae_vector lconnlast;
19547 : ae_int_t layerscount;
19548 : ae_int_t lastproc;
19549 :
19550 0 : ae_frame_make(_state, &_frame_block);
19551 0 : memset(&lsizes, 0, sizeof(lsizes));
19552 0 : memset(<ypes, 0, sizeof(ltypes));
19553 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19554 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19555 0 : _multilayerperceptron_clear(network);
19556 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19557 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19558 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19559 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19560 :
19561 0 : layerscount = 1+3+3+3;
19562 :
19563 : /*
19564 : * Allocate arrays
19565 : */
19566 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19567 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19568 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19569 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19570 :
19571 : /*
19572 : * Layers
19573 : */
19574 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19575 0 : mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19576 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19577 0 : mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19578 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19579 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19580 0 : mlpbase_addactivationlayer(-5, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19581 :
19582 : /*
19583 : * Create
19584 : */
19585 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19586 0 : mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_true, _state);
19587 0 : ae_frame_leave(_state);
19588 0 : }
19589 :
19590 :
19591 : /*************************************************************************
19592 : Creates neural network with NIn inputs, NOut outputs, without hidden
19593 : layers with non-linear output layer. Network weights are filled with small
19594 : random values.
19595 :
19596 : Activation function of the output layer takes values:
19597 :
19598 : (B, +INF), if D>=0
19599 :
19600 : or
19601 :
19602 : (-INF, B), if D<0.
19603 :
19604 :
19605 : -- ALGLIB --
19606 : Copyright 30.03.2008 by Bochkanov Sergey
19607 : *************************************************************************/
19608 0 : void mlpcreateb0(ae_int_t nin,
19609 : ae_int_t nout,
19610 : double b,
19611 : double d,
19612 : multilayerperceptron* network,
19613 : ae_state *_state)
19614 : {
19615 : ae_frame _frame_block;
19616 : ae_vector lsizes;
19617 : ae_vector ltypes;
19618 : ae_vector lconnfirst;
19619 : ae_vector lconnlast;
19620 : ae_int_t layerscount;
19621 : ae_int_t lastproc;
19622 : ae_int_t i;
19623 :
19624 0 : ae_frame_make(_state, &_frame_block);
19625 0 : memset(&lsizes, 0, sizeof(lsizes));
19626 0 : memset(<ypes, 0, sizeof(ltypes));
19627 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19628 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19629 0 : _multilayerperceptron_clear(network);
19630 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19631 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19632 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19633 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19634 :
19635 0 : layerscount = 1+3;
19636 0 : if( ae_fp_greater_eq(d,(double)(0)) )
19637 : {
19638 0 : d = (double)(1);
19639 : }
19640 : else
19641 : {
19642 0 : d = (double)(-1);
19643 : }
19644 :
19645 : /*
19646 : * Allocate arrays
19647 : */
19648 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19649 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19650 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19651 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19652 :
19653 : /*
19654 : * Layers
19655 : */
19656 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19657 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19658 0 : mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19659 :
19660 : /*
19661 : * Create
19662 : */
19663 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19664 0 : mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_false, _state);
19665 :
19666 : /*
19667 : * Turn on ouputs shift/scaling.
19668 : */
19669 0 : for(i=nin; i<=nin+nout-1; i++)
19670 : {
19671 0 : network->columnmeans.ptr.p_double[i] = b;
19672 0 : network->columnsigmas.ptr.p_double[i] = d;
19673 : }
19674 0 : ae_frame_leave(_state);
19675 0 : }
19676 :
19677 :
19678 : /*************************************************************************
19679 : Same as MLPCreateB0 but with non-linear hidden layer.
19680 :
19681 : -- ALGLIB --
19682 : Copyright 30.03.2008 by Bochkanov Sergey
19683 : *************************************************************************/
19684 0 : void mlpcreateb1(ae_int_t nin,
19685 : ae_int_t nhid,
19686 : ae_int_t nout,
19687 : double b,
19688 : double d,
19689 : multilayerperceptron* network,
19690 : ae_state *_state)
19691 : {
19692 : ae_frame _frame_block;
19693 : ae_vector lsizes;
19694 : ae_vector ltypes;
19695 : ae_vector lconnfirst;
19696 : ae_vector lconnlast;
19697 : ae_int_t layerscount;
19698 : ae_int_t lastproc;
19699 : ae_int_t i;
19700 :
19701 0 : ae_frame_make(_state, &_frame_block);
19702 0 : memset(&lsizes, 0, sizeof(lsizes));
19703 0 : memset(<ypes, 0, sizeof(ltypes));
19704 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19705 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19706 0 : _multilayerperceptron_clear(network);
19707 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19708 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19709 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19710 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19711 :
19712 0 : layerscount = 1+3+3;
19713 0 : if( ae_fp_greater_eq(d,(double)(0)) )
19714 : {
19715 0 : d = (double)(1);
19716 : }
19717 : else
19718 : {
19719 0 : d = (double)(-1);
19720 : }
19721 :
19722 : /*
19723 : * Allocate arrays
19724 : */
19725 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19726 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19727 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19728 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19729 :
19730 : /*
19731 : * Layers
19732 : */
19733 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19734 0 : mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19735 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19736 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19737 0 : mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19738 :
19739 : /*
19740 : * Create
19741 : */
19742 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19743 0 : mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_false, _state);
19744 :
19745 : /*
19746 : * Turn on ouputs shift/scaling.
19747 : */
19748 0 : for(i=nin; i<=nin+nout-1; i++)
19749 : {
19750 0 : network->columnmeans.ptr.p_double[i] = b;
19751 0 : network->columnsigmas.ptr.p_double[i] = d;
19752 : }
19753 0 : ae_frame_leave(_state);
19754 0 : }
19755 :
19756 :
19757 : /*************************************************************************
19758 : Same as MLPCreateB0 but with two non-linear hidden layers.
19759 :
19760 : -- ALGLIB --
19761 : Copyright 30.03.2008 by Bochkanov Sergey
19762 : *************************************************************************/
19763 0 : void mlpcreateb2(ae_int_t nin,
19764 : ae_int_t nhid1,
19765 : ae_int_t nhid2,
19766 : ae_int_t nout,
19767 : double b,
19768 : double d,
19769 : multilayerperceptron* network,
19770 : ae_state *_state)
19771 : {
19772 : ae_frame _frame_block;
19773 : ae_vector lsizes;
19774 : ae_vector ltypes;
19775 : ae_vector lconnfirst;
19776 : ae_vector lconnlast;
19777 : ae_int_t layerscount;
19778 : ae_int_t lastproc;
19779 : ae_int_t i;
19780 :
19781 0 : ae_frame_make(_state, &_frame_block);
19782 0 : memset(&lsizes, 0, sizeof(lsizes));
19783 0 : memset(<ypes, 0, sizeof(ltypes));
19784 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19785 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19786 0 : _multilayerperceptron_clear(network);
19787 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19788 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19789 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19790 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19791 :
19792 0 : layerscount = 1+3+3+3;
19793 0 : if( ae_fp_greater_eq(d,(double)(0)) )
19794 : {
19795 0 : d = (double)(1);
19796 : }
19797 : else
19798 : {
19799 0 : d = (double)(-1);
19800 : }
19801 :
19802 : /*
19803 : * Allocate arrays
19804 : */
19805 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19806 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19807 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19808 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19809 :
19810 : /*
19811 : * Layers
19812 : */
19813 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19814 0 : mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19815 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19816 0 : mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19817 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19818 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19819 0 : mlpbase_addactivationlayer(3, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19820 :
19821 : /*
19822 : * Create
19823 : */
19824 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19825 0 : mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_false, _state);
19826 :
19827 : /*
19828 : * Turn on ouputs shift/scaling.
19829 : */
19830 0 : for(i=nin; i<=nin+nout-1; i++)
19831 : {
19832 0 : network->columnmeans.ptr.p_double[i] = b;
19833 0 : network->columnsigmas.ptr.p_double[i] = d;
19834 : }
19835 0 : ae_frame_leave(_state);
19836 0 : }
19837 :
19838 :
19839 : /*************************************************************************
19840 : Creates neural network with NIn inputs, NOut outputs, without hidden
19841 : layers with non-linear output layer. Network weights are filled with small
19842 : random values. Activation function of the output layer takes values [A,B].
19843 :
19844 : -- ALGLIB --
19845 : Copyright 30.03.2008 by Bochkanov Sergey
19846 : *************************************************************************/
19847 0 : void mlpcreater0(ae_int_t nin,
19848 : ae_int_t nout,
19849 : double a,
19850 : double b,
19851 : multilayerperceptron* network,
19852 : ae_state *_state)
19853 : {
19854 : ae_frame _frame_block;
19855 : ae_vector lsizes;
19856 : ae_vector ltypes;
19857 : ae_vector lconnfirst;
19858 : ae_vector lconnlast;
19859 : ae_int_t layerscount;
19860 : ae_int_t lastproc;
19861 : ae_int_t i;
19862 :
19863 0 : ae_frame_make(_state, &_frame_block);
19864 0 : memset(&lsizes, 0, sizeof(lsizes));
19865 0 : memset(<ypes, 0, sizeof(ltypes));
19866 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19867 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19868 0 : _multilayerperceptron_clear(network);
19869 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19870 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19871 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19872 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19873 :
19874 0 : layerscount = 1+3;
19875 :
19876 : /*
19877 : * Allocate arrays
19878 : */
19879 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19880 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19881 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19882 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19883 :
19884 : /*
19885 : * Layers
19886 : */
19887 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19888 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19889 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19890 :
19891 : /*
19892 : * Create
19893 : */
19894 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19895 0 : mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_false, ae_false, _state);
19896 :
19897 : /*
19898 : * Turn on outputs shift/scaling.
19899 : */
19900 0 : for(i=nin; i<=nin+nout-1; i++)
19901 : {
19902 0 : network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
19903 0 : network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
19904 : }
19905 0 : ae_frame_leave(_state);
19906 0 : }
19907 :
19908 :
19909 : /*************************************************************************
19910 : Same as MLPCreateR0, but with non-linear hidden layer.
19911 :
19912 : -- ALGLIB --
19913 : Copyright 30.03.2008 by Bochkanov Sergey
19914 : *************************************************************************/
19915 0 : void mlpcreater1(ae_int_t nin,
19916 : ae_int_t nhid,
19917 : ae_int_t nout,
19918 : double a,
19919 : double b,
19920 : multilayerperceptron* network,
19921 : ae_state *_state)
19922 : {
19923 : ae_frame _frame_block;
19924 : ae_vector lsizes;
19925 : ae_vector ltypes;
19926 : ae_vector lconnfirst;
19927 : ae_vector lconnlast;
19928 : ae_int_t layerscount;
19929 : ae_int_t lastproc;
19930 : ae_int_t i;
19931 :
19932 0 : ae_frame_make(_state, &_frame_block);
19933 0 : memset(&lsizes, 0, sizeof(lsizes));
19934 0 : memset(<ypes, 0, sizeof(ltypes));
19935 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
19936 0 : memset(&lconnlast, 0, sizeof(lconnlast));
19937 0 : _multilayerperceptron_clear(network);
19938 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
19939 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
19940 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
19941 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
19942 :
19943 0 : layerscount = 1+3+3;
19944 :
19945 : /*
19946 : * Allocate arrays
19947 : */
19948 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
19949 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
19950 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
19951 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
19952 :
19953 : /*
19954 : * Layers
19955 : */
19956 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19957 0 : mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19958 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19959 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19960 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
19961 :
19962 : /*
19963 : * Create
19964 : */
19965 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
19966 0 : mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_false, ae_false, _state);
19967 :
19968 : /*
19969 : * Turn on outputs shift/scaling.
19970 : */
19971 0 : for(i=nin; i<=nin+nout-1; i++)
19972 : {
19973 0 : network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
19974 0 : network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
19975 : }
19976 0 : ae_frame_leave(_state);
19977 0 : }
19978 :
19979 :
19980 : /*************************************************************************
19981 : Same as MLPCreateR0, but with two non-linear hidden layers.
19982 :
19983 : -- ALGLIB --
19984 : Copyright 30.03.2008 by Bochkanov Sergey
19985 : *************************************************************************/
19986 0 : void mlpcreater2(ae_int_t nin,
19987 : ae_int_t nhid1,
19988 : ae_int_t nhid2,
19989 : ae_int_t nout,
19990 : double a,
19991 : double b,
19992 : multilayerperceptron* network,
19993 : ae_state *_state)
19994 : {
19995 : ae_frame _frame_block;
19996 : ae_vector lsizes;
19997 : ae_vector ltypes;
19998 : ae_vector lconnfirst;
19999 : ae_vector lconnlast;
20000 : ae_int_t layerscount;
20001 : ae_int_t lastproc;
20002 : ae_int_t i;
20003 :
20004 0 : ae_frame_make(_state, &_frame_block);
20005 0 : memset(&lsizes, 0, sizeof(lsizes));
20006 0 : memset(<ypes, 0, sizeof(ltypes));
20007 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
20008 0 : memset(&lconnlast, 0, sizeof(lconnlast));
20009 0 : _multilayerperceptron_clear(network);
20010 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
20011 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
20012 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
20013 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
20014 :
20015 0 : layerscount = 1+3+3+3;
20016 :
20017 : /*
20018 : * Allocate arrays
20019 : */
20020 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
20021 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
20022 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
20023 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
20024 :
20025 : /*
20026 : * Layers
20027 : */
20028 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20029 0 : mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20030 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20031 0 : mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20032 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20033 0 : mlpbase_addbiasedsummatorlayer(nout, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20034 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20035 :
20036 : /*
20037 : * Create
20038 : */
20039 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_false, network, _state);
20040 0 : mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_false, ae_false, _state);
20041 :
20042 : /*
20043 : * Turn on outputs shift/scaling.
20044 : */
20045 0 : for(i=nin; i<=nin+nout-1; i++)
20046 : {
20047 0 : network->columnmeans.ptr.p_double[i] = 0.5*(a+b);
20048 0 : network->columnsigmas.ptr.p_double[i] = 0.5*(a-b);
20049 : }
20050 0 : ae_frame_leave(_state);
20051 0 : }
20052 :
20053 :
20054 : /*************************************************************************
20055 : Creates classifier network with NIn inputs and NOut possible classes.
20056 : Network contains no hidden layers and linear output layer with SOFTMAX-
20057 : normalization (so outputs sums up to 1.0 and converge to posterior
20058 : probabilities).
20059 :
20060 : -- ALGLIB --
20061 : Copyright 04.11.2007 by Bochkanov Sergey
20062 : *************************************************************************/
20063 0 : void mlpcreatec0(ae_int_t nin,
20064 : ae_int_t nout,
20065 : multilayerperceptron* network,
20066 : ae_state *_state)
20067 : {
20068 : ae_frame _frame_block;
20069 : ae_vector lsizes;
20070 : ae_vector ltypes;
20071 : ae_vector lconnfirst;
20072 : ae_vector lconnlast;
20073 : ae_int_t layerscount;
20074 : ae_int_t lastproc;
20075 :
20076 0 : ae_frame_make(_state, &_frame_block);
20077 0 : memset(&lsizes, 0, sizeof(lsizes));
20078 0 : memset(<ypes, 0, sizeof(ltypes));
20079 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
20080 0 : memset(&lconnlast, 0, sizeof(lconnlast));
20081 0 : _multilayerperceptron_clear(network);
20082 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
20083 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
20084 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
20085 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
20086 :
20087 0 : ae_assert(nout>=2, "MLPCreateC0: NOut<2!", _state);
20088 0 : layerscount = 1+2+1;
20089 :
20090 : /*
20091 : * Allocate arrays
20092 : */
20093 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
20094 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
20095 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
20096 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
20097 :
20098 : /*
20099 : * Layers
20100 : */
20101 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20102 0 : mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20103 0 : mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20104 :
20105 : /*
20106 : * Create
20107 : */
20108 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
20109 0 : mlpbase_fillhighlevelinformation(network, nin, 0, 0, nout, ae_true, ae_true, _state);
20110 0 : ae_frame_leave(_state);
20111 0 : }
20112 :
20113 :
20114 : /*************************************************************************
20115 : Same as MLPCreateC0, but with one non-linear hidden layer.
20116 :
20117 : -- ALGLIB --
20118 : Copyright 04.11.2007 by Bochkanov Sergey
20119 : *************************************************************************/
20120 0 : void mlpcreatec1(ae_int_t nin,
20121 : ae_int_t nhid,
20122 : ae_int_t nout,
20123 : multilayerperceptron* network,
20124 : ae_state *_state)
20125 : {
20126 : ae_frame _frame_block;
20127 : ae_vector lsizes;
20128 : ae_vector ltypes;
20129 : ae_vector lconnfirst;
20130 : ae_vector lconnlast;
20131 : ae_int_t layerscount;
20132 : ae_int_t lastproc;
20133 :
20134 0 : ae_frame_make(_state, &_frame_block);
20135 0 : memset(&lsizes, 0, sizeof(lsizes));
20136 0 : memset(<ypes, 0, sizeof(ltypes));
20137 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
20138 0 : memset(&lconnlast, 0, sizeof(lconnlast));
20139 0 : _multilayerperceptron_clear(network);
20140 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
20141 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
20142 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
20143 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
20144 :
20145 0 : ae_assert(nout>=2, "MLPCreateC1: NOut<2!", _state);
20146 0 : layerscount = 1+3+2+1;
20147 :
20148 : /*
20149 : * Allocate arrays
20150 : */
20151 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
20152 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
20153 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
20154 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
20155 :
20156 : /*
20157 : * Layers
20158 : */
20159 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20160 0 : mlpbase_addbiasedsummatorlayer(nhid, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20161 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20162 0 : mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20163 0 : mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20164 :
20165 : /*
20166 : * Create
20167 : */
20168 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
20169 0 : mlpbase_fillhighlevelinformation(network, nin, nhid, 0, nout, ae_true, ae_true, _state);
20170 0 : ae_frame_leave(_state);
20171 0 : }
20172 :
20173 :
20174 : /*************************************************************************
20175 : Same as MLPCreateC0, but with two non-linear hidden layers.
20176 :
20177 : -- ALGLIB --
20178 : Copyright 04.11.2007 by Bochkanov Sergey
20179 : *************************************************************************/
20180 0 : void mlpcreatec2(ae_int_t nin,
20181 : ae_int_t nhid1,
20182 : ae_int_t nhid2,
20183 : ae_int_t nout,
20184 : multilayerperceptron* network,
20185 : ae_state *_state)
20186 : {
20187 : ae_frame _frame_block;
20188 : ae_vector lsizes;
20189 : ae_vector ltypes;
20190 : ae_vector lconnfirst;
20191 : ae_vector lconnlast;
20192 : ae_int_t layerscount;
20193 : ae_int_t lastproc;
20194 :
20195 0 : ae_frame_make(_state, &_frame_block);
20196 0 : memset(&lsizes, 0, sizeof(lsizes));
20197 0 : memset(<ypes, 0, sizeof(ltypes));
20198 0 : memset(&lconnfirst, 0, sizeof(lconnfirst));
20199 0 : memset(&lconnlast, 0, sizeof(lconnlast));
20200 0 : _multilayerperceptron_clear(network);
20201 0 : ae_vector_init(&lsizes, 0, DT_INT, _state, ae_true);
20202 0 : ae_vector_init(<ypes, 0, DT_INT, _state, ae_true);
20203 0 : ae_vector_init(&lconnfirst, 0, DT_INT, _state, ae_true);
20204 0 : ae_vector_init(&lconnlast, 0, DT_INT, _state, ae_true);
20205 :
20206 0 : ae_assert(nout>=2, "MLPCreateC2: NOut<2!", _state);
20207 0 : layerscount = 1+3+3+2+1;
20208 :
20209 : /*
20210 : * Allocate arrays
20211 : */
20212 0 : ae_vector_set_length(&lsizes, layerscount-1+1, _state);
20213 0 : ae_vector_set_length(<ypes, layerscount-1+1, _state);
20214 0 : ae_vector_set_length(&lconnfirst, layerscount-1+1, _state);
20215 0 : ae_vector_set_length(&lconnlast, layerscount-1+1, _state);
20216 :
20217 : /*
20218 : * Layers
20219 : */
20220 0 : mlpbase_addinputlayer(nin, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20221 0 : mlpbase_addbiasedsummatorlayer(nhid1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20222 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20223 0 : mlpbase_addbiasedsummatorlayer(nhid2, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20224 0 : mlpbase_addactivationlayer(1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20225 0 : mlpbase_addbiasedsummatorlayer(nout-1, &lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20226 0 : mlpbase_addzerolayer(&lsizes, <ypes, &lconnfirst, &lconnlast, &lastproc, _state);
20227 :
20228 : /*
20229 : * Create
20230 : */
20231 0 : mlpbase_mlpcreate(nin, nout, &lsizes, <ypes, &lconnfirst, &lconnlast, layerscount, ae_true, network, _state);
20232 0 : mlpbase_fillhighlevelinformation(network, nin, nhid1, nhid2, nout, ae_true, ae_true, _state);
20233 0 : ae_frame_leave(_state);
20234 0 : }
20235 :
20236 :
20237 : /*************************************************************************
20238 : Copying of neural network
20239 :
20240 : INPUT PARAMETERS:
20241 : Network1 - original
20242 :
20243 : OUTPUT PARAMETERS:
20244 : Network2 - copy
20245 :
20246 : -- ALGLIB --
20247 : Copyright 04.11.2007 by Bochkanov Sergey
20248 : *************************************************************************/
20249 0 : void mlpcopy(multilayerperceptron* network1,
20250 : multilayerperceptron* network2,
20251 : ae_state *_state)
20252 : {
20253 :
20254 0 : _multilayerperceptron_clear(network2);
20255 :
20256 0 : mlpcopyshared(network1, network2, _state);
20257 0 : }
20258 :
20259 :
20260 : /*************************************************************************
20261 : Copying of neural network (second parameter is passed as shared object).
20262 :
20263 : INPUT PARAMETERS:
20264 : Network1 - original
20265 :
20266 : OUTPUT PARAMETERS:
20267 : Network2 - copy
20268 :
20269 : -- ALGLIB --
20270 : Copyright 04.11.2007 by Bochkanov Sergey
20271 : *************************************************************************/
20272 0 : void mlpcopyshared(multilayerperceptron* network1,
20273 : multilayerperceptron* network2,
20274 : ae_state *_state)
20275 : {
20276 : ae_frame _frame_block;
20277 : ae_int_t wcount;
20278 : ae_int_t i;
20279 : mlpbuffers buf;
20280 : smlpgrad sgrad;
20281 :
20282 0 : ae_frame_make(_state, &_frame_block);
20283 0 : memset(&buf, 0, sizeof(buf));
20284 0 : memset(&sgrad, 0, sizeof(sgrad));
20285 0 : _mlpbuffers_init(&buf, _state, ae_true);
20286 0 : _smlpgrad_init(&sgrad, _state, ae_true);
20287 :
20288 :
20289 : /*
20290 : * Copy scalar and array fields
20291 : */
20292 0 : network2->hlnetworktype = network1->hlnetworktype;
20293 0 : network2->hlnormtype = network1->hlnormtype;
20294 0 : copyintegerarray(&network1->hllayersizes, &network2->hllayersizes, _state);
20295 0 : copyintegerarray(&network1->hlconnections, &network2->hlconnections, _state);
20296 0 : copyintegerarray(&network1->hlneurons, &network2->hlneurons, _state);
20297 0 : copyintegerarray(&network1->structinfo, &network2->structinfo, _state);
20298 0 : copyrealarray(&network1->weights, &network2->weights, _state);
20299 0 : copyrealarray(&network1->columnmeans, &network2->columnmeans, _state);
20300 0 : copyrealarray(&network1->columnsigmas, &network2->columnsigmas, _state);
20301 0 : copyrealarray(&network1->neurons, &network2->neurons, _state);
20302 0 : copyrealarray(&network1->dfdnet, &network2->dfdnet, _state);
20303 0 : copyrealarray(&network1->derror, &network2->derror, _state);
20304 0 : copyrealarray(&network1->x, &network2->x, _state);
20305 0 : copyrealarray(&network1->y, &network2->y, _state);
20306 0 : copyrealarray(&network1->nwbuf, &network2->nwbuf, _state);
20307 0 : copyintegerarray(&network1->integerbuf, &network2->integerbuf, _state);
20308 :
20309 : /*
20310 : * copy buffers
20311 : */
20312 0 : wcount = mlpgetweightscount(network1, _state);
20313 0 : ae_shared_pool_set_seed(&network2->buf, &buf, sizeof(buf), _mlpbuffers_init, _mlpbuffers_init_copy, _mlpbuffers_destroy, _state);
20314 0 : ae_vector_set_length(&sgrad.g, wcount, _state);
20315 0 : sgrad.f = 0.0;
20316 0 : for(i=0; i<=wcount-1; i++)
20317 : {
20318 0 : sgrad.g.ptr.p_double[i] = 0.0;
20319 : }
20320 0 : ae_shared_pool_set_seed(&network2->gradbuf, &sgrad, sizeof(sgrad), _smlpgrad_init, _smlpgrad_init_copy, _smlpgrad_destroy, _state);
20321 0 : ae_frame_leave(_state);
20322 0 : }
20323 :
20324 :
20325 : /*************************************************************************
20326 : This function compares architectures of neural networks. Only geometries
20327 : are compared, weights and other parameters are not tested.
20328 :
20329 : -- ALGLIB --
20330 : Copyright 20.06.2013 by Bochkanov Sergey
20331 : *************************************************************************/
20332 0 : ae_bool mlpsamearchitecture(multilayerperceptron* network1,
20333 : multilayerperceptron* network2,
20334 : ae_state *_state)
20335 : {
20336 : ae_int_t i;
20337 : ae_int_t ninfo;
20338 : ae_bool result;
20339 :
20340 :
20341 0 : ae_assert(network1->structinfo.cnt>0&&network1->structinfo.cnt>=network1->structinfo.ptr.p_int[0], "MLPSameArchitecture: Network1 is uninitialized", _state);
20342 0 : ae_assert(network2->structinfo.cnt>0&&network2->structinfo.cnt>=network2->structinfo.ptr.p_int[0], "MLPSameArchitecture: Network2 is uninitialized", _state);
20343 0 : result = ae_false;
20344 0 : if( network1->structinfo.ptr.p_int[0]!=network2->structinfo.ptr.p_int[0] )
20345 : {
20346 0 : return result;
20347 : }
20348 0 : ninfo = network1->structinfo.ptr.p_int[0];
20349 0 : for(i=0; i<=ninfo-1; i++)
20350 : {
20351 0 : if( network1->structinfo.ptr.p_int[i]!=network2->structinfo.ptr.p_int[i] )
20352 : {
20353 0 : return result;
20354 : }
20355 : }
20356 0 : result = ae_true;
20357 0 : return result;
20358 : }
20359 :
20360 :
20361 : /*************************************************************************
20362 : This function copies tunable parameters (weights/means/sigmas) from one
20363 : network to another with same architecture. It performs some rudimentary
20364 : checks that architectures are same, and throws exception if check fails.
20365 :
20366 : It is intended for fast copying of states between two network which are
20367 : known to have same geometry.
20368 :
20369 : INPUT PARAMETERS:
20370 : Network1 - source, must be correctly initialized
20371 : Network2 - target, must have same architecture
20372 :
20373 : OUTPUT PARAMETERS:
20374 : Network2 - network state is copied from source to target
20375 :
20376 : -- ALGLIB --
20377 : Copyright 20.06.2013 by Bochkanov Sergey
20378 : *************************************************************************/
20379 0 : void mlpcopytunableparameters(multilayerperceptron* network1,
20380 : multilayerperceptron* network2,
20381 : ae_state *_state)
20382 : {
20383 : ae_int_t i;
20384 : ae_int_t ninfo;
20385 : ae_int_t nin;
20386 : ae_int_t nout;
20387 : ae_int_t wcount;
20388 :
20389 :
20390 0 : ae_assert(network1->structinfo.cnt>0&&network1->structinfo.cnt>=network1->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network1 is uninitialized", _state);
20391 0 : ae_assert(network2->structinfo.cnt>0&&network2->structinfo.cnt>=network2->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network2 is uninitialized", _state);
20392 0 : ae_assert(network1->structinfo.ptr.p_int[0]==network2->structinfo.ptr.p_int[0], "MLPCopyTunableParameters: Network1 geometry differs from that of Network2", _state);
20393 0 : ninfo = network1->structinfo.ptr.p_int[0];
20394 0 : for(i=0; i<=ninfo-1; i++)
20395 : {
20396 0 : ae_assert(network1->structinfo.ptr.p_int[i]==network2->structinfo.ptr.p_int[i], "MLPCopyTunableParameters: Network1 geometry differs from that of Network2", _state);
20397 : }
20398 0 : mlpproperties(network1, &nin, &nout, &wcount, _state);
20399 0 : for(i=0; i<=wcount-1; i++)
20400 : {
20401 0 : network2->weights.ptr.p_double[i] = network1->weights.ptr.p_double[i];
20402 : }
20403 0 : if( mlpissoftmax(network1, _state) )
20404 : {
20405 0 : for(i=0; i<=nin-1; i++)
20406 : {
20407 0 : network2->columnmeans.ptr.p_double[i] = network1->columnmeans.ptr.p_double[i];
20408 0 : network2->columnsigmas.ptr.p_double[i] = network1->columnsigmas.ptr.p_double[i];
20409 : }
20410 : }
20411 : else
20412 : {
20413 0 : for(i=0; i<=nin+nout-1; i++)
20414 : {
20415 0 : network2->columnmeans.ptr.p_double[i] = network1->columnmeans.ptr.p_double[i];
20416 0 : network2->columnsigmas.ptr.p_double[i] = network1->columnsigmas.ptr.p_double[i];
20417 : }
20418 : }
20419 0 : }
20420 :
20421 :
20422 : /*************************************************************************
20423 : This function exports tunable parameters (weights/means/sigmas) from
20424 : network to contiguous array. Nothing is guaranteed about array format, the
20425 : only thing you can count for is that MLPImportTunableParameters() will be
20426 : able to parse it.
20427 :
20428 : It is intended for fast copying of states between network and backup array
20429 :
20430 : INPUT PARAMETERS:
20431 : Network - source, must be correctly initialized
20432 : P - array to use. If its size is enough to store data, it
20433 : is reused.
20434 :
20435 : OUTPUT PARAMETERS:
20436 : P - array which stores network parameters, resized if needed
20437 : PCount - number of parameters stored in array.
20438 :
20439 : -- ALGLIB --
20440 : Copyright 20.06.2013 by Bochkanov Sergey
20441 : *************************************************************************/
20442 0 : void mlpexporttunableparameters(multilayerperceptron* network,
20443 : /* Real */ ae_vector* p,
20444 : ae_int_t* pcount,
20445 : ae_state *_state)
20446 : {
20447 : ae_int_t i;
20448 : ae_int_t k;
20449 : ae_int_t nin;
20450 : ae_int_t nout;
20451 : ae_int_t wcount;
20452 :
20453 0 : *pcount = 0;
20454 :
20455 0 : ae_assert(network->structinfo.cnt>0&&network->structinfo.cnt>=network->structinfo.ptr.p_int[0], "MLPExportTunableParameters: Network is uninitialized", _state);
20456 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
20457 0 : if( mlpissoftmax(network, _state) )
20458 : {
20459 0 : *pcount = wcount+2*nin;
20460 0 : rvectorsetlengthatleast(p, *pcount, _state);
20461 0 : k = 0;
20462 0 : for(i=0; i<=wcount-1; i++)
20463 : {
20464 0 : p->ptr.p_double[k] = network->weights.ptr.p_double[i];
20465 0 : k = k+1;
20466 : }
20467 0 : for(i=0; i<=nin-1; i++)
20468 : {
20469 0 : p->ptr.p_double[k] = network->columnmeans.ptr.p_double[i];
20470 0 : k = k+1;
20471 0 : p->ptr.p_double[k] = network->columnsigmas.ptr.p_double[i];
20472 0 : k = k+1;
20473 : }
20474 : }
20475 : else
20476 : {
20477 0 : *pcount = wcount+2*(nin+nout);
20478 0 : rvectorsetlengthatleast(p, *pcount, _state);
20479 0 : k = 0;
20480 0 : for(i=0; i<=wcount-1; i++)
20481 : {
20482 0 : p->ptr.p_double[k] = network->weights.ptr.p_double[i];
20483 0 : k = k+1;
20484 : }
20485 0 : for(i=0; i<=nin+nout-1; i++)
20486 : {
20487 0 : p->ptr.p_double[k] = network->columnmeans.ptr.p_double[i];
20488 0 : k = k+1;
20489 0 : p->ptr.p_double[k] = network->columnsigmas.ptr.p_double[i];
20490 0 : k = k+1;
20491 : }
20492 : }
20493 0 : }
20494 :
20495 :
20496 : /*************************************************************************
20497 : This function imports tunable parameters (weights/means/sigmas) which
20498 : were exported by MLPExportTunableParameters().
20499 :
20500 : It is intended for fast copying of states between network and backup array
20501 :
20502 : INPUT PARAMETERS:
20503 : Network - target:
20504 : * must be correctly initialized
20505 : * must have same geometry as network used to export params
20506 : P - array with parameters
20507 :
20508 : -- ALGLIB --
20509 : Copyright 20.06.2013 by Bochkanov Sergey
20510 : *************************************************************************/
20511 0 : void mlpimporttunableparameters(multilayerperceptron* network,
20512 : /* Real */ ae_vector* p,
20513 : ae_state *_state)
20514 : {
20515 : ae_int_t i;
20516 : ae_int_t k;
20517 : ae_int_t nin;
20518 : ae_int_t nout;
20519 : ae_int_t wcount;
20520 :
20521 :
20522 0 : ae_assert(network->structinfo.cnt>0&&network->structinfo.cnt>=network->structinfo.ptr.p_int[0], "MLPImportTunableParameters: Network is uninitialized", _state);
20523 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
20524 0 : if( mlpissoftmax(network, _state) )
20525 : {
20526 0 : k = 0;
20527 0 : for(i=0; i<=wcount-1; i++)
20528 : {
20529 0 : network->weights.ptr.p_double[i] = p->ptr.p_double[k];
20530 0 : k = k+1;
20531 : }
20532 0 : for(i=0; i<=nin-1; i++)
20533 : {
20534 0 : network->columnmeans.ptr.p_double[i] = p->ptr.p_double[k];
20535 0 : k = k+1;
20536 0 : network->columnsigmas.ptr.p_double[i] = p->ptr.p_double[k];
20537 0 : k = k+1;
20538 : }
20539 : }
20540 : else
20541 : {
20542 0 : k = 0;
20543 0 : for(i=0; i<=wcount-1; i++)
20544 : {
20545 0 : network->weights.ptr.p_double[i] = p->ptr.p_double[k];
20546 0 : k = k+1;
20547 : }
20548 0 : for(i=0; i<=nin+nout-1; i++)
20549 : {
20550 0 : network->columnmeans.ptr.p_double[i] = p->ptr.p_double[k];
20551 0 : k = k+1;
20552 0 : network->columnsigmas.ptr.p_double[i] = p->ptr.p_double[k];
20553 0 : k = k+1;
20554 : }
20555 : }
20556 0 : }
20557 :
20558 :
20559 : /*************************************************************************
20560 : Serialization of MultiLayerPerceptron strucure
20561 :
20562 : INPUT PARAMETERS:
20563 : Network - original
20564 :
20565 : OUTPUT PARAMETERS:
20566 : RA - array of real numbers which stores network,
20567 : array[0..RLen-1]
20568 : RLen - RA lenght
20569 :
20570 : -- ALGLIB --
20571 : Copyright 29.03.2008 by Bochkanov Sergey
20572 : *************************************************************************/
20573 0 : void mlpserializeold(multilayerperceptron* network,
20574 : /* Real */ ae_vector* ra,
20575 : ae_int_t* rlen,
20576 : ae_state *_state)
20577 : {
20578 : ae_int_t i;
20579 : ae_int_t ssize;
20580 : ae_int_t nin;
20581 : ae_int_t nout;
20582 : ae_int_t wcount;
20583 : ae_int_t sigmalen;
20584 : ae_int_t offs;
20585 :
20586 0 : ae_vector_clear(ra);
20587 0 : *rlen = 0;
20588 :
20589 :
20590 : /*
20591 : * Unload info
20592 : */
20593 0 : ssize = network->structinfo.ptr.p_int[0];
20594 0 : nin = network->structinfo.ptr.p_int[1];
20595 0 : nout = network->structinfo.ptr.p_int[2];
20596 0 : wcount = network->structinfo.ptr.p_int[4];
20597 0 : if( mlpissoftmax(network, _state) )
20598 : {
20599 0 : sigmalen = nin;
20600 : }
20601 : else
20602 : {
20603 0 : sigmalen = nin+nout;
20604 : }
20605 :
20606 : /*
20607 : * RA format:
20608 : * LEN DESRC.
20609 : * 1 RLen
20610 : * 1 version (MLPVNum)
20611 : * 1 StructInfo size
20612 : * SSize StructInfo
20613 : * WCount Weights
20614 : * SigmaLen ColumnMeans
20615 : * SigmaLen ColumnSigmas
20616 : */
20617 0 : *rlen = 3+ssize+wcount+2*sigmalen;
20618 0 : ae_vector_set_length(ra, *rlen-1+1, _state);
20619 0 : ra->ptr.p_double[0] = (double)(*rlen);
20620 0 : ra->ptr.p_double[1] = (double)(mlpbase_mlpvnum);
20621 0 : ra->ptr.p_double[2] = (double)(ssize);
20622 0 : offs = 3;
20623 0 : for(i=0; i<=ssize-1; i++)
20624 : {
20625 0 : ra->ptr.p_double[offs+i] = (double)(network->structinfo.ptr.p_int[i]);
20626 : }
20627 0 : offs = offs+ssize;
20628 0 : ae_v_move(&ra->ptr.p_double[offs], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(offs,offs+wcount-1));
20629 0 : offs = offs+wcount;
20630 0 : ae_v_move(&ra->ptr.p_double[offs], 1, &network->columnmeans.ptr.p_double[0], 1, ae_v_len(offs,offs+sigmalen-1));
20631 0 : offs = offs+sigmalen;
20632 0 : ae_v_move(&ra->ptr.p_double[offs], 1, &network->columnsigmas.ptr.p_double[0], 1, ae_v_len(offs,offs+sigmalen-1));
20633 0 : offs = offs+sigmalen;
20634 0 : }
20635 :
20636 :
20637 : /*************************************************************************
20638 : Unserialization of MultiLayerPerceptron strucure
20639 :
20640 : INPUT PARAMETERS:
20641 : RA - real array which stores network
20642 :
20643 : OUTPUT PARAMETERS:
20644 : Network - restored network
20645 :
20646 : -- ALGLIB --
20647 : Copyright 29.03.2008 by Bochkanov Sergey
20648 : *************************************************************************/
20649 0 : void mlpunserializeold(/* Real */ ae_vector* ra,
20650 : multilayerperceptron* network,
20651 : ae_state *_state)
20652 : {
20653 : ae_int_t i;
20654 : ae_int_t ssize;
20655 : ae_int_t ntotal;
20656 : ae_int_t nin;
20657 : ae_int_t nout;
20658 : ae_int_t wcount;
20659 : ae_int_t sigmalen;
20660 : ae_int_t offs;
20661 :
20662 0 : _multilayerperceptron_clear(network);
20663 :
20664 0 : ae_assert(ae_round(ra->ptr.p_double[1], _state)==mlpbase_mlpvnum, "MLPUnserialize: incorrect array!", _state);
20665 :
20666 : /*
20667 : * Unload StructInfo from IA
20668 : */
20669 0 : offs = 3;
20670 0 : ssize = ae_round(ra->ptr.p_double[2], _state);
20671 0 : ae_vector_set_length(&network->structinfo, ssize-1+1, _state);
20672 0 : for(i=0; i<=ssize-1; i++)
20673 : {
20674 0 : network->structinfo.ptr.p_int[i] = ae_round(ra->ptr.p_double[offs+i], _state);
20675 : }
20676 0 : offs = offs+ssize;
20677 :
20678 : /*
20679 : * Unload info from StructInfo
20680 : */
20681 0 : ssize = network->structinfo.ptr.p_int[0];
20682 0 : nin = network->structinfo.ptr.p_int[1];
20683 0 : nout = network->structinfo.ptr.p_int[2];
20684 0 : ntotal = network->structinfo.ptr.p_int[3];
20685 0 : wcount = network->structinfo.ptr.p_int[4];
20686 0 : if( network->structinfo.ptr.p_int[6]==0 )
20687 : {
20688 0 : sigmalen = nin+nout;
20689 : }
20690 : else
20691 : {
20692 0 : sigmalen = nin;
20693 : }
20694 :
20695 : /*
20696 : * Allocate space for other fields
20697 : */
20698 0 : ae_vector_set_length(&network->weights, wcount-1+1, _state);
20699 0 : ae_vector_set_length(&network->columnmeans, sigmalen-1+1, _state);
20700 0 : ae_vector_set_length(&network->columnsigmas, sigmalen-1+1, _state);
20701 0 : ae_vector_set_length(&network->neurons, ntotal-1+1, _state);
20702 0 : ae_vector_set_length(&network->nwbuf, ae_maxint(wcount, 2*nout, _state)-1+1, _state);
20703 0 : ae_vector_set_length(&network->dfdnet, ntotal-1+1, _state);
20704 0 : ae_vector_set_length(&network->x, nin-1+1, _state);
20705 0 : ae_vector_set_length(&network->y, nout-1+1, _state);
20706 0 : ae_vector_set_length(&network->derror, ntotal-1+1, _state);
20707 :
20708 : /*
20709 : * Copy parameters from RA
20710 : */
20711 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,wcount-1));
20712 0 : offs = offs+wcount;
20713 0 : ae_v_move(&network->columnmeans.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,sigmalen-1));
20714 0 : offs = offs+sigmalen;
20715 0 : ae_v_move(&network->columnsigmas.ptr.p_double[0], 1, &ra->ptr.p_double[offs], 1, ae_v_len(0,sigmalen-1));
20716 0 : offs = offs+sigmalen;
20717 0 : }
20718 :
20719 :
20720 : /*************************************************************************
20721 : Randomization of neural network weights
20722 :
20723 : -- ALGLIB --
20724 : Copyright 06.11.2007 by Bochkanov Sergey
20725 : *************************************************************************/
20726 0 : void mlprandomize(multilayerperceptron* network, ae_state *_state)
20727 : {
20728 : ae_frame _frame_block;
20729 : ae_int_t nin;
20730 : ae_int_t nout;
20731 : ae_int_t wcount;
20732 : ae_int_t ntotal;
20733 : ae_int_t istart;
20734 : hqrndstate r;
20735 : ae_int_t entrysize;
20736 : ae_int_t entryoffs;
20737 : ae_int_t neuronidx;
20738 : ae_int_t neurontype;
20739 : double vmean;
20740 : double vvar;
20741 : ae_int_t i;
20742 : ae_int_t n1;
20743 : ae_int_t n2;
20744 : double desiredsigma;
20745 : ae_int_t montecarlocnt;
20746 : double ef;
20747 : double ef2;
20748 : double v;
20749 : double wscale;
20750 :
20751 0 : ae_frame_make(_state, &_frame_block);
20752 0 : memset(&r, 0, sizeof(r));
20753 0 : _hqrndstate_init(&r, _state, ae_true);
20754 :
20755 0 : hqrndrandomize(&r, _state);
20756 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
20757 0 : ntotal = network->structinfo.ptr.p_int[3];
20758 0 : istart = network->structinfo.ptr.p_int[5];
20759 0 : desiredsigma = 0.5;
20760 0 : montecarlocnt = 20;
20761 :
20762 : /*
20763 : * Stage 1:
20764 : * * Network.Weights is filled by standard deviation of weights
20765 : * * default values: sigma=1
20766 : */
20767 0 : for(i=0; i<=wcount-1; i++)
20768 : {
20769 0 : network->weights.ptr.p_double[i] = 1.0;
20770 : }
20771 :
20772 : /*
20773 : * Stage 2:
20774 : * * assume that input neurons have zero mean and unit standard deviation
20775 : * * assume that constant neurons have zero standard deviation
20776 : * * perform forward pass along neurons
20777 : * * for each non-input non-constant neuron:
20778 : * * calculate mean and standard deviation of neuron's output
20779 : * assuming that we know means/deviations of neurons which feed it
20780 : * and assuming that weights has unit variance and zero mean.
20781 : * * for each nonlinear neuron additionally we perform backward pass:
20782 : * * scale variances of weights which feed it in such way that neuron's
20783 : * input has unit standard deviation
20784 : *
20785 : * NOTE: this algorithm assumes that each connection feeds at most one
20786 : * non-linear neuron. This assumption can be incorrect in upcoming
20787 : * architectures with strong neurons. However, algorithm should
20788 : * work smoothly even in this case.
20789 : *
20790 : * During this stage we use Network.RndBuf, which is grouped into NTotal
20791 : * entries, each of them having following format:
20792 : *
20793 : * Buf[Offset+0] mean value of neuron's output
20794 : * Buf[Offset+1] standard deviation of neuron's output
20795 : *
20796 : *
20797 : */
20798 0 : entrysize = 2;
20799 0 : rvectorsetlengthatleast(&network->rndbuf, entrysize*ntotal, _state);
20800 0 : for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
20801 : {
20802 0 : neurontype = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+0];
20803 0 : entryoffs = entrysize*neuronidx;
20804 0 : if( neurontype==-2 )
20805 : {
20806 :
20807 : /*
20808 : * Input neuron: zero mean, unit variance.
20809 : */
20810 0 : network->rndbuf.ptr.p_double[entryoffs+0] = 0.0;
20811 0 : network->rndbuf.ptr.p_double[entryoffs+1] = 1.0;
20812 0 : continue;
20813 : }
20814 0 : if( neurontype==-3 )
20815 : {
20816 :
20817 : /*
20818 : * "-1" neuron: mean=-1, zero variance.
20819 : */
20820 0 : network->rndbuf.ptr.p_double[entryoffs+0] = -1.0;
20821 0 : network->rndbuf.ptr.p_double[entryoffs+1] = 0.0;
20822 0 : continue;
20823 : }
20824 0 : if( neurontype==-4 )
20825 : {
20826 :
20827 : /*
20828 : * "0" neuron: mean=0, zero variance.
20829 : */
20830 0 : network->rndbuf.ptr.p_double[entryoffs+0] = 0.0;
20831 0 : network->rndbuf.ptr.p_double[entryoffs+1] = 0.0;
20832 0 : continue;
20833 : }
20834 0 : if( neurontype==0 )
20835 : {
20836 :
20837 : /*
20838 : * Adaptive summator neuron:
20839 : * * calculate its mean and variance.
20840 : * * we assume that weights of this neuron have unit variance and zero mean.
20841 : * * thus, neuron's output is always have zero mean
20842 : * * as for variance, it is a bit more interesting:
20843 : * * let n[i] is i-th input neuron
20844 : * * let w[i] is i-th weight
20845 : * * we assume that n[i] and w[i] are independently distributed
20846 : * * Var(n0*w0+n1*w1+...) = Var(n0*w0)+Var(n1*w1)+...
20847 : * * Var(X*Y) = mean(X)^2*Var(Y) + mean(Y)^2*Var(X) + Var(X)*Var(Y)
20848 : * * mean(w[i])=0, var(w[i])=1
20849 : * * Var(n[i]*w[i]) = mean(n[i])^2 + Var(n[i])
20850 : */
20851 0 : n1 = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
20852 0 : n2 = n1+network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+1]-1;
20853 0 : vmean = 0.0;
20854 0 : vvar = 0.0;
20855 0 : for(i=n1; i<=n2; i++)
20856 : {
20857 0 : vvar = vvar+ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+0], _state)+ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
20858 : }
20859 0 : network->rndbuf.ptr.p_double[entryoffs+0] = vmean;
20860 0 : network->rndbuf.ptr.p_double[entryoffs+1] = ae_sqrt(vvar, _state);
20861 0 : continue;
20862 : }
20863 0 : if( neurontype==-5 )
20864 : {
20865 :
20866 : /*
20867 : * Linear activation function
20868 : */
20869 0 : i = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
20870 0 : vmean = network->rndbuf.ptr.p_double[entrysize*i+0];
20871 0 : vvar = ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
20872 0 : if( ae_fp_greater(vvar,(double)(0)) )
20873 : {
20874 0 : wscale = desiredsigma/ae_sqrt(vvar, _state);
20875 : }
20876 : else
20877 : {
20878 0 : wscale = 1.0;
20879 : }
20880 0 : mlpbase_randomizebackwardpass(network, i, wscale, _state);
20881 0 : network->rndbuf.ptr.p_double[entryoffs+0] = vmean*wscale;
20882 0 : network->rndbuf.ptr.p_double[entryoffs+1] = desiredsigma;
20883 0 : continue;
20884 : }
20885 0 : if( neurontype>0 )
20886 : {
20887 :
20888 : /*
20889 : * Nonlinear activation function:
20890 : * * scale its inputs
20891 : * * estimate mean/sigma of its output using Monte-Carlo method
20892 : * (we simulate different inputs with unit deviation and
20893 : * sample activation function output on such inputs)
20894 : */
20895 0 : i = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+2];
20896 0 : vmean = network->rndbuf.ptr.p_double[entrysize*i+0];
20897 0 : vvar = ae_sqr(network->rndbuf.ptr.p_double[entrysize*i+1], _state);
20898 0 : if( ae_fp_greater(vvar,(double)(0)) )
20899 : {
20900 0 : wscale = desiredsigma/ae_sqrt(vvar, _state);
20901 : }
20902 : else
20903 : {
20904 0 : wscale = 1.0;
20905 : }
20906 0 : mlpbase_randomizebackwardpass(network, i, wscale, _state);
20907 0 : ef = 0.0;
20908 0 : ef2 = 0.0;
20909 0 : vmean = vmean*wscale;
20910 0 : for(i=0; i<=montecarlocnt-1; i++)
20911 : {
20912 0 : v = vmean+desiredsigma*hqrndnormal(&r, _state);
20913 0 : ef = ef+v;
20914 0 : ef2 = ef2+v*v;
20915 : }
20916 0 : ef = ef/montecarlocnt;
20917 0 : ef2 = ef2/montecarlocnt;
20918 0 : network->rndbuf.ptr.p_double[entryoffs+0] = ef;
20919 0 : network->rndbuf.ptr.p_double[entryoffs+1] = ae_maxreal(ef2-ef*ef, 0.0, _state);
20920 0 : continue;
20921 : }
20922 0 : ae_assert(ae_false, "MLPRandomize: unexpected neuron type", _state);
20923 : }
20924 :
20925 : /*
20926 : * Stage 3: generate weights.
20927 : */
20928 0 : for(i=0; i<=wcount-1; i++)
20929 : {
20930 0 : network->weights.ptr.p_double[i] = network->weights.ptr.p_double[i]*hqrndnormal(&r, _state);
20931 : }
20932 0 : ae_frame_leave(_state);
20933 0 : }
20934 :
20935 :
20936 : /*************************************************************************
20937 : Randomization of neural network weights and standartisator
20938 :
20939 : -- ALGLIB --
20940 : Copyright 10.03.2008 by Bochkanov Sergey
20941 : *************************************************************************/
20942 0 : void mlprandomizefull(multilayerperceptron* network, ae_state *_state)
20943 : {
20944 : ae_int_t i;
20945 : ae_int_t nin;
20946 : ae_int_t nout;
20947 : ae_int_t wcount;
20948 : ae_int_t ntotal;
20949 : ae_int_t istart;
20950 : ae_int_t offs;
20951 : ae_int_t ntype;
20952 :
20953 :
20954 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
20955 0 : ntotal = network->structinfo.ptr.p_int[3];
20956 0 : istart = network->structinfo.ptr.p_int[5];
20957 :
20958 : /*
20959 : * Process network
20960 : */
20961 0 : mlprandomize(network, _state);
20962 0 : for(i=0; i<=nin-1; i++)
20963 : {
20964 0 : network->columnmeans.ptr.p_double[i] = ae_randomreal(_state)-0.5;
20965 0 : network->columnsigmas.ptr.p_double[i] = ae_randomreal(_state)+0.5;
20966 : }
20967 0 : if( !mlpissoftmax(network, _state) )
20968 : {
20969 0 : for(i=0; i<=nout-1; i++)
20970 : {
20971 0 : offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
20972 0 : ntype = network->structinfo.ptr.p_int[offs+0];
20973 0 : if( ntype==0 )
20974 : {
20975 :
20976 : /*
20977 : * Shifts are changed only for linear outputs neurons
20978 : */
20979 0 : network->columnmeans.ptr.p_double[nin+i] = 2*ae_randomreal(_state)-1;
20980 : }
20981 0 : if( ntype==0||ntype==3 )
20982 : {
20983 :
20984 : /*
20985 : * Scales are changed only for linear or bounded outputs neurons.
20986 : * Note that scale randomization preserves sign.
20987 : */
20988 0 : network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*(1.5*ae_randomreal(_state)+0.5);
20989 : }
20990 : }
20991 : }
20992 0 : }
20993 :
20994 :
20995 : /*************************************************************************
20996 : Internal subroutine.
20997 :
20998 : -- ALGLIB --
20999 : Copyright 30.03.2008 by Bochkanov Sergey
21000 : *************************************************************************/
21001 0 : void mlpinitpreprocessor(multilayerperceptron* network,
21002 : /* Real */ ae_matrix* xy,
21003 : ae_int_t ssize,
21004 : ae_state *_state)
21005 : {
21006 : ae_frame _frame_block;
21007 : ae_int_t i;
21008 : ae_int_t j;
21009 : ae_int_t jmax;
21010 : ae_int_t nin;
21011 : ae_int_t nout;
21012 : ae_int_t wcount;
21013 : ae_int_t ntotal;
21014 : ae_int_t istart;
21015 : ae_int_t offs;
21016 : ae_int_t ntype;
21017 : ae_vector means;
21018 : ae_vector sigmas;
21019 : double s;
21020 :
21021 0 : ae_frame_make(_state, &_frame_block);
21022 0 : memset(&means, 0, sizeof(means));
21023 0 : memset(&sigmas, 0, sizeof(sigmas));
21024 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
21025 0 : ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
21026 :
21027 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
21028 0 : ntotal = network->structinfo.ptr.p_int[3];
21029 0 : istart = network->structinfo.ptr.p_int[5];
21030 :
21031 : /*
21032 : * Means/Sigmas
21033 : */
21034 0 : if( mlpissoftmax(network, _state) )
21035 : {
21036 0 : jmax = nin-1;
21037 : }
21038 : else
21039 : {
21040 0 : jmax = nin+nout-1;
21041 : }
21042 0 : ae_vector_set_length(&means, jmax+1, _state);
21043 0 : ae_vector_set_length(&sigmas, jmax+1, _state);
21044 0 : for(i=0; i<=jmax; i++)
21045 : {
21046 0 : means.ptr.p_double[i] = (double)(0);
21047 0 : sigmas.ptr.p_double[i] = (double)(0);
21048 : }
21049 0 : for(i=0; i<=ssize-1; i++)
21050 : {
21051 0 : for(j=0; j<=jmax; j++)
21052 : {
21053 0 : means.ptr.p_double[j] = means.ptr.p_double[j]+xy->ptr.pp_double[i][j];
21054 : }
21055 : }
21056 0 : for(i=0; i<=jmax; i++)
21057 : {
21058 0 : means.ptr.p_double[i] = means.ptr.p_double[i]/ssize;
21059 : }
21060 0 : for(i=0; i<=ssize-1; i++)
21061 : {
21062 0 : for(j=0; j<=jmax; j++)
21063 : {
21064 0 : sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(xy->ptr.pp_double[i][j]-means.ptr.p_double[j], _state);
21065 : }
21066 : }
21067 0 : for(i=0; i<=jmax; i++)
21068 : {
21069 0 : sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/ssize, _state);
21070 : }
21071 :
21072 : /*
21073 : * Inputs
21074 : */
21075 0 : for(i=0; i<=nin-1; i++)
21076 : {
21077 0 : network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
21078 0 : network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
21079 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
21080 : {
21081 0 : network->columnsigmas.ptr.p_double[i] = (double)(1);
21082 : }
21083 : }
21084 :
21085 : /*
21086 : * Outputs
21087 : */
21088 0 : if( !mlpissoftmax(network, _state) )
21089 : {
21090 0 : for(i=0; i<=nout-1; i++)
21091 : {
21092 0 : offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
21093 0 : ntype = network->structinfo.ptr.p_int[offs+0];
21094 :
21095 : /*
21096 : * Linear outputs
21097 : */
21098 0 : if( ntype==0 )
21099 : {
21100 0 : network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
21101 0 : network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
21102 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21103 : {
21104 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21105 : }
21106 : }
21107 :
21108 : /*
21109 : * Bounded outputs (half-interval)
21110 : */
21111 0 : if( ntype==3 )
21112 : {
21113 0 : s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
21114 0 : if( ae_fp_eq(s,(double)(0)) )
21115 : {
21116 0 : s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
21117 : }
21118 0 : if( ae_fp_eq(s,(double)(0)) )
21119 : {
21120 0 : s = 1.0;
21121 : }
21122 0 : network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
21123 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21124 : {
21125 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21126 : }
21127 : }
21128 : }
21129 : }
21130 0 : ae_frame_leave(_state);
21131 0 : }
21132 :
21133 :
21134 : /*************************************************************************
21135 : Internal subroutine.
21136 : Initialization for preprocessor based on a sample.
21137 :
21138 : INPUT
21139 : Network - initialized neural network;
21140 : XY - sample, given by sparse matrix;
21141 : SSize - sample size.
21142 :
21143 : OUTPUT
21144 : Network - neural network with initialised preprocessor.
21145 :
21146 : -- ALGLIB --
21147 : Copyright 26.07.2012 by Bochkanov Sergey
21148 : *************************************************************************/
21149 0 : void mlpinitpreprocessorsparse(multilayerperceptron* network,
21150 : sparsematrix* xy,
21151 : ae_int_t ssize,
21152 : ae_state *_state)
21153 : {
21154 : ae_frame _frame_block;
21155 : ae_int_t jmax;
21156 : ae_int_t nin;
21157 : ae_int_t nout;
21158 : ae_int_t wcount;
21159 : ae_int_t ntotal;
21160 : ae_int_t istart;
21161 : ae_int_t offs;
21162 : ae_int_t ntype;
21163 : ae_vector means;
21164 : ae_vector sigmas;
21165 : double s;
21166 : ae_int_t i;
21167 : ae_int_t j;
21168 :
21169 0 : ae_frame_make(_state, &_frame_block);
21170 0 : memset(&means, 0, sizeof(means));
21171 0 : memset(&sigmas, 0, sizeof(sigmas));
21172 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
21173 0 : ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
21174 :
21175 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
21176 0 : ntotal = network->structinfo.ptr.p_int[3];
21177 0 : istart = network->structinfo.ptr.p_int[5];
21178 :
21179 : /*
21180 : * Means/Sigmas
21181 : */
21182 0 : if( mlpissoftmax(network, _state) )
21183 : {
21184 0 : jmax = nin-1;
21185 : }
21186 : else
21187 : {
21188 0 : jmax = nin+nout-1;
21189 : }
21190 0 : ae_vector_set_length(&means, jmax+1, _state);
21191 0 : ae_vector_set_length(&sigmas, jmax+1, _state);
21192 0 : for(i=0; i<=jmax; i++)
21193 : {
21194 0 : means.ptr.p_double[i] = (double)(0);
21195 0 : sigmas.ptr.p_double[i] = (double)(0);
21196 : }
21197 0 : for(i=0; i<=ssize-1; i++)
21198 : {
21199 0 : sparsegetrow(xy, i, &network->xyrow, _state);
21200 0 : for(j=0; j<=jmax; j++)
21201 : {
21202 0 : means.ptr.p_double[j] = means.ptr.p_double[j]+network->xyrow.ptr.p_double[j];
21203 : }
21204 : }
21205 0 : for(i=0; i<=jmax; i++)
21206 : {
21207 0 : means.ptr.p_double[i] = means.ptr.p_double[i]/ssize;
21208 : }
21209 0 : for(i=0; i<=ssize-1; i++)
21210 : {
21211 0 : sparsegetrow(xy, i, &network->xyrow, _state);
21212 0 : for(j=0; j<=jmax; j++)
21213 : {
21214 0 : sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(network->xyrow.ptr.p_double[j]-means.ptr.p_double[j], _state);
21215 : }
21216 : }
21217 0 : for(i=0; i<=jmax; i++)
21218 : {
21219 0 : sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/ssize, _state);
21220 : }
21221 :
21222 : /*
21223 : * Inputs
21224 : */
21225 0 : for(i=0; i<=nin-1; i++)
21226 : {
21227 0 : network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
21228 0 : network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
21229 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
21230 : {
21231 0 : network->columnsigmas.ptr.p_double[i] = (double)(1);
21232 : }
21233 : }
21234 :
21235 : /*
21236 : * Outputs
21237 : */
21238 0 : if( !mlpissoftmax(network, _state) )
21239 : {
21240 0 : for(i=0; i<=nout-1; i++)
21241 : {
21242 0 : offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
21243 0 : ntype = network->structinfo.ptr.p_int[offs+0];
21244 :
21245 : /*
21246 : * Linear outputs
21247 : */
21248 0 : if( ntype==0 )
21249 : {
21250 0 : network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
21251 0 : network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
21252 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21253 : {
21254 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21255 : }
21256 : }
21257 :
21258 : /*
21259 : * Bounded outputs (half-interval)
21260 : */
21261 0 : if( ntype==3 )
21262 : {
21263 0 : s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
21264 0 : if( ae_fp_eq(s,(double)(0)) )
21265 : {
21266 0 : s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
21267 : }
21268 0 : if( ae_fp_eq(s,(double)(0)) )
21269 : {
21270 0 : s = 1.0;
21271 : }
21272 0 : network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
21273 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21274 : {
21275 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21276 : }
21277 : }
21278 : }
21279 : }
21280 0 : ae_frame_leave(_state);
21281 0 : }
21282 :
21283 :
21284 : /*************************************************************************
21285 : Internal subroutine.
21286 : Initialization for preprocessor based on a subsample.
21287 :
21288 : INPUT PARAMETERS:
21289 : Network - network initialized with one of the network creation funcs
21290 : XY - original dataset; one sample = one row;
21291 : first NIn columns contain inputs,
21292 : next NOut columns - desired outputs.
21293 : SetSize - real size of XY, SetSize>=0;
21294 : Idx - subset of SubsetSize elements, array[SubsetSize]:
21295 : * Idx[I] stores row index in the original dataset which is
21296 : given by XY. Gradient is calculated with respect to rows
21297 : whose indexes are stored in Idx[].
21298 : * Idx[] must store correct indexes; this function throws
21299 : an exception in case incorrect index (less than 0 or
21300 : larger than rows(XY)) is given
21301 : * Idx[] may store indexes in any order and even with
21302 : repetitions.
21303 : SubsetSize- number of elements in Idx[] array.
21304 :
21305 : OUTPUT:
21306 : Network - neural network with initialised preprocessor.
21307 :
21308 : NOTE: when SubsetSize<0 is used full dataset by call MLPInitPreprocessor
21309 : function.
21310 :
21311 : -- ALGLIB --
21312 : Copyright 23.08.2012 by Bochkanov Sergey
21313 : *************************************************************************/
21314 0 : void mlpinitpreprocessorsubset(multilayerperceptron* network,
21315 : /* Real */ ae_matrix* xy,
21316 : ae_int_t setsize,
21317 : /* Integer */ ae_vector* idx,
21318 : ae_int_t subsetsize,
21319 : ae_state *_state)
21320 : {
21321 : ae_frame _frame_block;
21322 : ae_int_t jmax;
21323 : ae_int_t nin;
21324 : ae_int_t nout;
21325 : ae_int_t wcount;
21326 : ae_int_t ntotal;
21327 : ae_int_t istart;
21328 : ae_int_t offs;
21329 : ae_int_t ntype;
21330 : ae_vector means;
21331 : ae_vector sigmas;
21332 : double s;
21333 : ae_int_t npoints;
21334 : ae_int_t i;
21335 : ae_int_t j;
21336 :
21337 0 : ae_frame_make(_state, &_frame_block);
21338 0 : memset(&means, 0, sizeof(means));
21339 0 : memset(&sigmas, 0, sizeof(sigmas));
21340 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
21341 0 : ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
21342 :
21343 0 : ae_assert(setsize>=0, "MLPInitPreprocessorSubset: SetSize<0", _state);
21344 0 : if( subsetsize<0 )
21345 : {
21346 0 : mlpinitpreprocessor(network, xy, setsize, _state);
21347 0 : ae_frame_leave(_state);
21348 0 : return;
21349 : }
21350 0 : ae_assert(subsetsize<=idx->cnt, "MLPInitPreprocessorSubset: SubsetSize>Length(Idx)", _state);
21351 0 : npoints = setsize;
21352 0 : for(i=0; i<=subsetsize-1; i++)
21353 : {
21354 0 : ae_assert(idx->ptr.p_int[i]>=0, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]<0)", _state);
21355 0 : ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
21356 : }
21357 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
21358 0 : ntotal = network->structinfo.ptr.p_int[3];
21359 0 : istart = network->structinfo.ptr.p_int[5];
21360 :
21361 : /*
21362 : * Means/Sigmas
21363 : */
21364 0 : if( mlpissoftmax(network, _state) )
21365 : {
21366 0 : jmax = nin-1;
21367 : }
21368 : else
21369 : {
21370 0 : jmax = nin+nout-1;
21371 : }
21372 0 : ae_vector_set_length(&means, jmax+1, _state);
21373 0 : ae_vector_set_length(&sigmas, jmax+1, _state);
21374 0 : for(i=0; i<=jmax; i++)
21375 : {
21376 0 : means.ptr.p_double[i] = (double)(0);
21377 0 : sigmas.ptr.p_double[i] = (double)(0);
21378 : }
21379 0 : for(i=0; i<=subsetsize-1; i++)
21380 : {
21381 0 : for(j=0; j<=jmax; j++)
21382 : {
21383 0 : means.ptr.p_double[j] = means.ptr.p_double[j]+xy->ptr.pp_double[idx->ptr.p_int[i]][j];
21384 : }
21385 : }
21386 0 : for(i=0; i<=jmax; i++)
21387 : {
21388 0 : means.ptr.p_double[i] = means.ptr.p_double[i]/subsetsize;
21389 : }
21390 0 : for(i=0; i<=subsetsize-1; i++)
21391 : {
21392 0 : for(j=0; j<=jmax; j++)
21393 : {
21394 0 : sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(xy->ptr.pp_double[idx->ptr.p_int[i]][j]-means.ptr.p_double[j], _state);
21395 : }
21396 : }
21397 0 : for(i=0; i<=jmax; i++)
21398 : {
21399 0 : sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/subsetsize, _state);
21400 : }
21401 :
21402 : /*
21403 : * Inputs
21404 : */
21405 0 : for(i=0; i<=nin-1; i++)
21406 : {
21407 0 : network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
21408 0 : network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
21409 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
21410 : {
21411 0 : network->columnsigmas.ptr.p_double[i] = (double)(1);
21412 : }
21413 : }
21414 :
21415 : /*
21416 : * Outputs
21417 : */
21418 0 : if( !mlpissoftmax(network, _state) )
21419 : {
21420 0 : for(i=0; i<=nout-1; i++)
21421 : {
21422 0 : offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
21423 0 : ntype = network->structinfo.ptr.p_int[offs+0];
21424 :
21425 : /*
21426 : * Linear outputs
21427 : */
21428 0 : if( ntype==0 )
21429 : {
21430 0 : network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
21431 0 : network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
21432 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21433 : {
21434 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21435 : }
21436 : }
21437 :
21438 : /*
21439 : * Bounded outputs (half-interval)
21440 : */
21441 0 : if( ntype==3 )
21442 : {
21443 0 : s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
21444 0 : if( ae_fp_eq(s,(double)(0)) )
21445 : {
21446 0 : s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
21447 : }
21448 0 : if( ae_fp_eq(s,(double)(0)) )
21449 : {
21450 0 : s = 1.0;
21451 : }
21452 0 : network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
21453 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21454 : {
21455 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21456 : }
21457 : }
21458 : }
21459 : }
21460 0 : ae_frame_leave(_state);
21461 : }
21462 :
21463 :
21464 : /*************************************************************************
21465 : Internal subroutine.
21466 : Initialization for preprocessor based on a subsample.
21467 :
21468 : INPUT PARAMETERS:
21469 : Network - network initialized with one of the network creation funcs
21470 : XY - original dataset, given by sparse matrix;
21471 : one sample = one row;
21472 : first NIn columns contain inputs,
21473 : next NOut columns - desired outputs.
21474 : SetSize - real size of XY, SetSize>=0;
21475 : Idx - subset of SubsetSize elements, array[SubsetSize]:
21476 : * Idx[I] stores row index in the original dataset which is
21477 : given by XY. Gradient is calculated with respect to rows
21478 : whose indexes are stored in Idx[].
21479 : * Idx[] must store correct indexes; this function throws
21480 : an exception in case incorrect index (less than 0 or
21481 : larger than rows(XY)) is given
21482 : * Idx[] may store indexes in any order and even with
21483 : repetitions.
21484 : SubsetSize- number of elements in Idx[] array.
21485 :
21486 : OUTPUT:
21487 : Network - neural network with initialised preprocessor.
21488 :
21489 : NOTE: when SubsetSize<0 is used full dataset by call
21490 : MLPInitPreprocessorSparse function.
21491 :
21492 : -- ALGLIB --
21493 : Copyright 26.07.2012 by Bochkanov Sergey
21494 : *************************************************************************/
21495 0 : void mlpinitpreprocessorsparsesubset(multilayerperceptron* network,
21496 : sparsematrix* xy,
21497 : ae_int_t setsize,
21498 : /* Integer */ ae_vector* idx,
21499 : ae_int_t subsetsize,
21500 : ae_state *_state)
21501 : {
21502 : ae_frame _frame_block;
21503 : ae_int_t jmax;
21504 : ae_int_t nin;
21505 : ae_int_t nout;
21506 : ae_int_t wcount;
21507 : ae_int_t ntotal;
21508 : ae_int_t istart;
21509 : ae_int_t offs;
21510 : ae_int_t ntype;
21511 : ae_vector means;
21512 : ae_vector sigmas;
21513 : double s;
21514 : ae_int_t npoints;
21515 : ae_int_t i;
21516 : ae_int_t j;
21517 :
21518 0 : ae_frame_make(_state, &_frame_block);
21519 0 : memset(&means, 0, sizeof(means));
21520 0 : memset(&sigmas, 0, sizeof(sigmas));
21521 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
21522 0 : ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
21523 :
21524 0 : ae_assert(setsize>=0, "MLPInitPreprocessorSparseSubset: SetSize<0", _state);
21525 0 : if( subsetsize<0 )
21526 : {
21527 0 : mlpinitpreprocessorsparse(network, xy, setsize, _state);
21528 0 : ae_frame_leave(_state);
21529 0 : return;
21530 : }
21531 0 : ae_assert(subsetsize<=idx->cnt, "MLPInitPreprocessorSparseSubset: SubsetSize>Length(Idx)", _state);
21532 0 : npoints = setsize;
21533 0 : for(i=0; i<=subsetsize-1; i++)
21534 : {
21535 0 : ae_assert(idx->ptr.p_int[i]>=0, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]<0)", _state);
21536 0 : ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
21537 : }
21538 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
21539 0 : ntotal = network->structinfo.ptr.p_int[3];
21540 0 : istart = network->structinfo.ptr.p_int[5];
21541 :
21542 : /*
21543 : * Means/Sigmas
21544 : */
21545 0 : if( mlpissoftmax(network, _state) )
21546 : {
21547 0 : jmax = nin-1;
21548 : }
21549 : else
21550 : {
21551 0 : jmax = nin+nout-1;
21552 : }
21553 0 : ae_vector_set_length(&means, jmax+1, _state);
21554 0 : ae_vector_set_length(&sigmas, jmax+1, _state);
21555 0 : for(i=0; i<=jmax; i++)
21556 : {
21557 0 : means.ptr.p_double[i] = (double)(0);
21558 0 : sigmas.ptr.p_double[i] = (double)(0);
21559 : }
21560 0 : for(i=0; i<=subsetsize-1; i++)
21561 : {
21562 0 : sparsegetrow(xy, idx->ptr.p_int[i], &network->xyrow, _state);
21563 0 : for(j=0; j<=jmax; j++)
21564 : {
21565 0 : means.ptr.p_double[j] = means.ptr.p_double[j]+network->xyrow.ptr.p_double[j];
21566 : }
21567 : }
21568 0 : for(i=0; i<=jmax; i++)
21569 : {
21570 0 : means.ptr.p_double[i] = means.ptr.p_double[i]/subsetsize;
21571 : }
21572 0 : for(i=0; i<=subsetsize-1; i++)
21573 : {
21574 0 : sparsegetrow(xy, idx->ptr.p_int[i], &network->xyrow, _state);
21575 0 : for(j=0; j<=jmax; j++)
21576 : {
21577 0 : sigmas.ptr.p_double[j] = sigmas.ptr.p_double[j]+ae_sqr(network->xyrow.ptr.p_double[j]-means.ptr.p_double[j], _state);
21578 : }
21579 : }
21580 0 : for(i=0; i<=jmax; i++)
21581 : {
21582 0 : sigmas.ptr.p_double[i] = ae_sqrt(sigmas.ptr.p_double[i]/subsetsize, _state);
21583 : }
21584 :
21585 : /*
21586 : * Inputs
21587 : */
21588 0 : for(i=0; i<=nin-1; i++)
21589 : {
21590 0 : network->columnmeans.ptr.p_double[i] = means.ptr.p_double[i];
21591 0 : network->columnsigmas.ptr.p_double[i] = sigmas.ptr.p_double[i];
21592 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
21593 : {
21594 0 : network->columnsigmas.ptr.p_double[i] = (double)(1);
21595 : }
21596 : }
21597 :
21598 : /*
21599 : * Outputs
21600 : */
21601 0 : if( !mlpissoftmax(network, _state) )
21602 : {
21603 0 : for(i=0; i<=nout-1; i++)
21604 : {
21605 0 : offs = istart+(ntotal-nout+i)*mlpbase_nfieldwidth;
21606 0 : ntype = network->structinfo.ptr.p_int[offs+0];
21607 :
21608 : /*
21609 : * Linear outputs
21610 : */
21611 0 : if( ntype==0 )
21612 : {
21613 0 : network->columnmeans.ptr.p_double[nin+i] = means.ptr.p_double[nin+i];
21614 0 : network->columnsigmas.ptr.p_double[nin+i] = sigmas.ptr.p_double[nin+i];
21615 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21616 : {
21617 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21618 : }
21619 : }
21620 :
21621 : /*
21622 : * Bounded outputs (half-interval)
21623 : */
21624 0 : if( ntype==3 )
21625 : {
21626 0 : s = means.ptr.p_double[nin+i]-network->columnmeans.ptr.p_double[nin+i];
21627 0 : if( ae_fp_eq(s,(double)(0)) )
21628 : {
21629 0 : s = (double)(ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state));
21630 : }
21631 0 : if( ae_fp_eq(s,(double)(0)) )
21632 : {
21633 0 : s = 1.0;
21634 : }
21635 0 : network->columnsigmas.ptr.p_double[nin+i] = ae_sign(network->columnsigmas.ptr.p_double[nin+i], _state)*ae_fabs(s, _state);
21636 0 : if( ae_fp_eq(network->columnsigmas.ptr.p_double[nin+i],(double)(0)) )
21637 : {
21638 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
21639 : }
21640 : }
21641 : }
21642 : }
21643 0 : ae_frame_leave(_state);
21644 : }
21645 :
21646 :
21647 : /*************************************************************************
21648 : Returns information about initialized network: number of inputs, outputs,
21649 : weights.
21650 :
21651 : -- ALGLIB --
21652 : Copyright 04.11.2007 by Bochkanov Sergey
21653 : *************************************************************************/
21654 0 : void mlpproperties(multilayerperceptron* network,
21655 : ae_int_t* nin,
21656 : ae_int_t* nout,
21657 : ae_int_t* wcount,
21658 : ae_state *_state)
21659 : {
21660 :
21661 0 : *nin = 0;
21662 0 : *nout = 0;
21663 0 : *wcount = 0;
21664 :
21665 0 : *nin = network->structinfo.ptr.p_int[1];
21666 0 : *nout = network->structinfo.ptr.p_int[2];
21667 0 : *wcount = network->structinfo.ptr.p_int[4];
21668 0 : }
21669 :
21670 :
21671 : /*************************************************************************
21672 : Returns number of "internal", low-level neurons in the network (one which
21673 : is stored in StructInfo).
21674 :
21675 : -- ALGLIB --
21676 : Copyright 04.11.2007 by Bochkanov Sergey
21677 : *************************************************************************/
21678 0 : ae_int_t mlpntotal(multilayerperceptron* network, ae_state *_state)
21679 : {
21680 : ae_int_t result;
21681 :
21682 :
21683 0 : result = network->structinfo.ptr.p_int[3];
21684 0 : return result;
21685 : }
21686 :
21687 :
21688 : /*************************************************************************
21689 : Returns number of inputs.
21690 :
21691 : -- ALGLIB --
21692 : Copyright 19.10.2011 by Bochkanov Sergey
21693 : *************************************************************************/
21694 0 : ae_int_t mlpgetinputscount(multilayerperceptron* network,
21695 : ae_state *_state)
21696 : {
21697 : ae_int_t result;
21698 :
21699 :
21700 0 : result = network->structinfo.ptr.p_int[1];
21701 0 : return result;
21702 : }
21703 :
21704 :
21705 : /*************************************************************************
21706 : Returns number of outputs.
21707 :
21708 : -- ALGLIB --
21709 : Copyright 19.10.2011 by Bochkanov Sergey
21710 : *************************************************************************/
21711 0 : ae_int_t mlpgetoutputscount(multilayerperceptron* network,
21712 : ae_state *_state)
21713 : {
21714 : ae_int_t result;
21715 :
21716 :
21717 0 : result = network->structinfo.ptr.p_int[2];
21718 0 : return result;
21719 : }
21720 :
21721 :
21722 : /*************************************************************************
21723 : Returns number of weights.
21724 :
21725 : -- ALGLIB --
21726 : Copyright 19.10.2011 by Bochkanov Sergey
21727 : *************************************************************************/
21728 0 : ae_int_t mlpgetweightscount(multilayerperceptron* network,
21729 : ae_state *_state)
21730 : {
21731 : ae_int_t result;
21732 :
21733 :
21734 0 : result = network->structinfo.ptr.p_int[4];
21735 0 : return result;
21736 : }
21737 :
21738 :
21739 : /*************************************************************************
21740 : Tells whether network is SOFTMAX-normalized (i.e. classifier) or not.
21741 :
21742 : -- ALGLIB --
21743 : Copyright 04.11.2007 by Bochkanov Sergey
21744 : *************************************************************************/
21745 0 : ae_bool mlpissoftmax(multilayerperceptron* network, ae_state *_state)
21746 : {
21747 : ae_bool result;
21748 :
21749 :
21750 0 : result = network->structinfo.ptr.p_int[6]==1;
21751 0 : return result;
21752 : }
21753 :
21754 :
21755 : /*************************************************************************
21756 : This function returns total number of layers (including input, hidden and
21757 : output layers).
21758 :
21759 : -- ALGLIB --
21760 : Copyright 25.03.2011 by Bochkanov Sergey
21761 : *************************************************************************/
21762 0 : ae_int_t mlpgetlayerscount(multilayerperceptron* network,
21763 : ae_state *_state)
21764 : {
21765 : ae_int_t result;
21766 :
21767 :
21768 0 : result = network->hllayersizes.cnt;
21769 0 : return result;
21770 : }
21771 :
21772 :
21773 : /*************************************************************************
21774 : This function returns size of K-th layer.
21775 :
21776 : K=0 corresponds to input layer, K=CNT-1 corresponds to output layer.
21777 :
21778 : Size of the output layer is always equal to the number of outputs, although
21779 : when we have softmax-normalized network, last neuron doesn't have any
21780 : connections - it is just zero.
21781 :
21782 : -- ALGLIB --
21783 : Copyright 25.03.2011 by Bochkanov Sergey
21784 : *************************************************************************/
21785 0 : ae_int_t mlpgetlayersize(multilayerperceptron* network,
21786 : ae_int_t k,
21787 : ae_state *_state)
21788 : {
21789 : ae_int_t result;
21790 :
21791 :
21792 0 : ae_assert(k>=0&&k<network->hllayersizes.cnt, "MLPGetLayerSize: incorrect layer index", _state);
21793 0 : result = network->hllayersizes.ptr.p_int[k];
21794 0 : return result;
21795 : }
21796 :
21797 :
21798 : /*************************************************************************
21799 : This function returns offset/scaling coefficients for I-th input of the
21800 : network.
21801 :
21802 : INPUT PARAMETERS:
21803 : Network - network
21804 : I - input index
21805 :
21806 : OUTPUT PARAMETERS:
21807 : Mean - mean term
21808 : Sigma - sigma term, guaranteed to be nonzero.
21809 :
21810 : I-th input is passed through linear transformation
21811 : IN[i] = (IN[i]-Mean)/Sigma
21812 : before feeding to the network
21813 :
21814 : -- ALGLIB --
21815 : Copyright 25.03.2011 by Bochkanov Sergey
21816 : *************************************************************************/
21817 0 : void mlpgetinputscaling(multilayerperceptron* network,
21818 : ae_int_t i,
21819 : double* mean,
21820 : double* sigma,
21821 : ae_state *_state)
21822 : {
21823 :
21824 0 : *mean = 0;
21825 0 : *sigma = 0;
21826 :
21827 0 : ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[0], "MLPGetInputScaling: incorrect (nonexistent) I", _state);
21828 0 : *mean = network->columnmeans.ptr.p_double[i];
21829 0 : *sigma = network->columnsigmas.ptr.p_double[i];
21830 0 : if( ae_fp_eq(*sigma,(double)(0)) )
21831 : {
21832 0 : *sigma = (double)(1);
21833 : }
21834 0 : }
21835 :
21836 :
21837 : /*************************************************************************
21838 : This function returns offset/scaling coefficients for I-th output of the
21839 : network.
21840 :
21841 : INPUT PARAMETERS:
21842 : Network - network
21843 : I - input index
21844 :
21845 : OUTPUT PARAMETERS:
21846 : Mean - mean term
21847 : Sigma - sigma term, guaranteed to be nonzero.
21848 :
21849 : I-th output is passed through linear transformation
21850 : OUT[i] = OUT[i]*Sigma+Mean
21851 : before returning it to user. In case we have SOFTMAX-normalized network,
21852 : we return (Mean,Sigma)=(0.0,1.0).
21853 :
21854 : -- ALGLIB --
21855 : Copyright 25.03.2011 by Bochkanov Sergey
21856 : *************************************************************************/
21857 0 : void mlpgetoutputscaling(multilayerperceptron* network,
21858 : ae_int_t i,
21859 : double* mean,
21860 : double* sigma,
21861 : ae_state *_state)
21862 : {
21863 :
21864 0 : *mean = 0;
21865 0 : *sigma = 0;
21866 :
21867 0 : ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1], "MLPGetOutputScaling: incorrect (nonexistent) I", _state);
21868 0 : if( network->structinfo.ptr.p_int[6]==1 )
21869 : {
21870 0 : *mean = (double)(0);
21871 0 : *sigma = (double)(1);
21872 : }
21873 : else
21874 : {
21875 0 : *mean = network->columnmeans.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i];
21876 0 : *sigma = network->columnsigmas.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i];
21877 : }
21878 0 : }
21879 :
21880 :
21881 : /*************************************************************************
21882 : This function returns information about Ith neuron of Kth layer
21883 :
21884 : INPUT PARAMETERS:
21885 : Network - network
21886 : K - layer index
21887 : I - neuron index (within layer)
21888 :
21889 : OUTPUT PARAMETERS:
21890 : FKind - activation function type (used by MLPActivationFunction())
21891 : this value is zero for input or linear neurons
21892 : Threshold - also called offset, bias
21893 : zero for input neurons
21894 :
21895 : NOTE: this function throws exception if layer or neuron with given index
21896 : do not exists.
21897 :
21898 : -- ALGLIB --
21899 : Copyright 25.03.2011 by Bochkanov Sergey
21900 : *************************************************************************/
21901 0 : void mlpgetneuroninfo(multilayerperceptron* network,
21902 : ae_int_t k,
21903 : ae_int_t i,
21904 : ae_int_t* fkind,
21905 : double* threshold,
21906 : ae_state *_state)
21907 : {
21908 : ae_int_t ncnt;
21909 : ae_int_t istart;
21910 : ae_int_t highlevelidx;
21911 : ae_int_t activationoffset;
21912 :
21913 0 : *fkind = 0;
21914 0 : *threshold = 0;
21915 :
21916 0 : ncnt = network->hlneurons.cnt/mlpbase_hlnfieldwidth;
21917 0 : istart = network->structinfo.ptr.p_int[5];
21918 :
21919 : /*
21920 : * search
21921 : */
21922 0 : network->integerbuf.ptr.p_int[0] = k;
21923 0 : network->integerbuf.ptr.p_int[1] = i;
21924 0 : highlevelidx = recsearch(&network->hlneurons, mlpbase_hlnfieldwidth, 2, 0, ncnt, &network->integerbuf, _state);
21925 0 : ae_assert(highlevelidx>=0, "MLPGetNeuronInfo: incorrect (nonexistent) layer or neuron index", _state);
21926 :
21927 : /*
21928 : * 1. find offset of the activation function record in the
21929 : */
21930 0 : if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]>=0 )
21931 : {
21932 0 : activationoffset = istart+network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]*mlpbase_nfieldwidth;
21933 0 : *fkind = network->structinfo.ptr.p_int[activationoffset+0];
21934 : }
21935 : else
21936 : {
21937 0 : *fkind = 0;
21938 : }
21939 0 : if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]>=0 )
21940 : {
21941 0 : *threshold = network->weights.ptr.p_double[network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]];
21942 : }
21943 : else
21944 : {
21945 0 : *threshold = (double)(0);
21946 : }
21947 0 : }
21948 :
21949 :
21950 : /*************************************************************************
21951 : This function returns information about connection from I0-th neuron of
21952 : K0-th layer to I1-th neuron of K1-th layer.
21953 :
21954 : INPUT PARAMETERS:
21955 : Network - network
21956 : K0 - layer index
21957 : I0 - neuron index (within layer)
21958 : K1 - layer index
21959 : I1 - neuron index (within layer)
21960 :
21961 : RESULT:
21962 : connection weight (zero for non-existent connections)
21963 :
21964 : This function:
21965 : 1. throws exception if layer or neuron with given index do not exists.
21966 : 2. returns zero if neurons exist, but there is no connection between them
21967 :
21968 : -- ALGLIB --
21969 : Copyright 25.03.2011 by Bochkanov Sergey
21970 : *************************************************************************/
21971 0 : double mlpgetweight(multilayerperceptron* network,
21972 : ae_int_t k0,
21973 : ae_int_t i0,
21974 : ae_int_t k1,
21975 : ae_int_t i1,
21976 : ae_state *_state)
21977 : {
21978 : ae_int_t ccnt;
21979 : ae_int_t highlevelidx;
21980 : double result;
21981 :
21982 :
21983 0 : ccnt = network->hlconnections.cnt/mlpbase_hlconnfieldwidth;
21984 :
21985 : /*
21986 : * check params
21987 : */
21988 0 : ae_assert(k0>=0&&k0<network->hllayersizes.cnt, "MLPGetWeight: incorrect (nonexistent) K0", _state);
21989 0 : ae_assert(i0>=0&&i0<network->hllayersizes.ptr.p_int[k0], "MLPGetWeight: incorrect (nonexistent) I0", _state);
21990 0 : ae_assert(k1>=0&&k1<network->hllayersizes.cnt, "MLPGetWeight: incorrect (nonexistent) K1", _state);
21991 0 : ae_assert(i1>=0&&i1<network->hllayersizes.ptr.p_int[k1], "MLPGetWeight: incorrect (nonexistent) I1", _state);
21992 :
21993 : /*
21994 : * search
21995 : */
21996 0 : network->integerbuf.ptr.p_int[0] = k0;
21997 0 : network->integerbuf.ptr.p_int[1] = i0;
21998 0 : network->integerbuf.ptr.p_int[2] = k1;
21999 0 : network->integerbuf.ptr.p_int[3] = i1;
22000 0 : highlevelidx = recsearch(&network->hlconnections, mlpbase_hlconnfieldwidth, 4, 0, ccnt, &network->integerbuf, _state);
22001 0 : if( highlevelidx>=0 )
22002 : {
22003 0 : result = network->weights.ptr.p_double[network->hlconnections.ptr.p_int[highlevelidx*mlpbase_hlconnfieldwidth+4]];
22004 : }
22005 : else
22006 : {
22007 0 : result = (double)(0);
22008 : }
22009 0 : return result;
22010 : }
22011 :
22012 :
22013 : /*************************************************************************
22014 : This function sets offset/scaling coefficients for I-th input of the
22015 : network.
22016 :
22017 : INPUT PARAMETERS:
22018 : Network - network
22019 : I - input index
22020 : Mean - mean term
22021 : Sigma - sigma term (if zero, will be replaced by 1.0)
22022 :
22023 : NTE: I-th input is passed through linear transformation
22024 : IN[i] = (IN[i]-Mean)/Sigma
22025 : before feeding to the network. This function sets Mean and Sigma.
22026 :
22027 : -- ALGLIB --
22028 : Copyright 25.03.2011 by Bochkanov Sergey
22029 : *************************************************************************/
22030 0 : void mlpsetinputscaling(multilayerperceptron* network,
22031 : ae_int_t i,
22032 : double mean,
22033 : double sigma,
22034 : ae_state *_state)
22035 : {
22036 :
22037 :
22038 0 : ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[0], "MLPSetInputScaling: incorrect (nonexistent) I", _state);
22039 0 : ae_assert(ae_isfinite(mean, _state), "MLPSetInputScaling: infinite or NAN Mean", _state);
22040 0 : ae_assert(ae_isfinite(sigma, _state), "MLPSetInputScaling: infinite or NAN Sigma", _state);
22041 0 : if( ae_fp_eq(sigma,(double)(0)) )
22042 : {
22043 0 : sigma = (double)(1);
22044 : }
22045 0 : network->columnmeans.ptr.p_double[i] = mean;
22046 0 : network->columnsigmas.ptr.p_double[i] = sigma;
22047 0 : }
22048 :
22049 :
22050 : /*************************************************************************
22051 : This function sets offset/scaling coefficients for I-th output of the
22052 : network.
22053 :
22054 : INPUT PARAMETERS:
22055 : Network - network
22056 : I - input index
22057 : Mean - mean term
22058 : Sigma - sigma term (if zero, will be replaced by 1.0)
22059 :
22060 : OUTPUT PARAMETERS:
22061 :
22062 : NOTE: I-th output is passed through linear transformation
22063 : OUT[i] = OUT[i]*Sigma+Mean
22064 : before returning it to user. This function sets Sigma/Mean. In case we
22065 : have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything
22066 : other than(0.0,1.0) - this function will throw exception.
22067 :
22068 : -- ALGLIB --
22069 : Copyright 25.03.2011 by Bochkanov Sergey
22070 : *************************************************************************/
22071 0 : void mlpsetoutputscaling(multilayerperceptron* network,
22072 : ae_int_t i,
22073 : double mean,
22074 : double sigma,
22075 : ae_state *_state)
22076 : {
22077 :
22078 :
22079 0 : ae_assert(i>=0&&i<network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1], "MLPSetOutputScaling: incorrect (nonexistent) I", _state);
22080 0 : ae_assert(ae_isfinite(mean, _state), "MLPSetOutputScaling: infinite or NAN Mean", _state);
22081 0 : ae_assert(ae_isfinite(sigma, _state), "MLPSetOutputScaling: infinite or NAN Sigma", _state);
22082 0 : if( network->structinfo.ptr.p_int[6]==1 )
22083 : {
22084 0 : ae_assert(ae_fp_eq(mean,(double)(0)), "MLPSetOutputScaling: you can not set non-zero Mean term for classifier network", _state);
22085 0 : ae_assert(ae_fp_eq(sigma,(double)(1)), "MLPSetOutputScaling: you can not set non-unit Sigma term for classifier network", _state);
22086 : }
22087 : else
22088 : {
22089 0 : if( ae_fp_eq(sigma,(double)(0)) )
22090 : {
22091 0 : sigma = (double)(1);
22092 : }
22093 0 : network->columnmeans.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i] = mean;
22094 0 : network->columnsigmas.ptr.p_double[network->hllayersizes.ptr.p_int[0]+i] = sigma;
22095 : }
22096 0 : }
22097 :
22098 :
22099 : /*************************************************************************
22100 : This function modifies information about Ith neuron of Kth layer
22101 :
22102 : INPUT PARAMETERS:
22103 : Network - network
22104 : K - layer index
22105 : I - neuron index (within layer)
22106 : FKind - activation function type (used by MLPActivationFunction())
22107 : this value must be zero for input neurons
22108 : (you can not set activation function for input neurons)
22109 : Threshold - also called offset, bias
22110 : this value must be zero for input neurons
22111 : (you can not set threshold for input neurons)
22112 :
22113 : NOTES:
22114 : 1. this function throws exception if layer or neuron with given index do
22115 : not exists.
22116 : 2. this function also throws exception when you try to set non-linear
22117 : activation function for input neurons (any kind of network) or for output
22118 : neurons of classifier network.
22119 : 3. this function throws exception when you try to set non-zero threshold for
22120 : input neurons (any kind of network).
22121 :
22122 : -- ALGLIB --
22123 : Copyright 25.03.2011 by Bochkanov Sergey
22124 : *************************************************************************/
22125 0 : void mlpsetneuroninfo(multilayerperceptron* network,
22126 : ae_int_t k,
22127 : ae_int_t i,
22128 : ae_int_t fkind,
22129 : double threshold,
22130 : ae_state *_state)
22131 : {
22132 : ae_int_t ncnt;
22133 : ae_int_t istart;
22134 : ae_int_t highlevelidx;
22135 : ae_int_t activationoffset;
22136 :
22137 :
22138 0 : ae_assert(ae_isfinite(threshold, _state), "MLPSetNeuronInfo: infinite or NAN Threshold", _state);
22139 :
22140 : /*
22141 : * convenience vars
22142 : */
22143 0 : ncnt = network->hlneurons.cnt/mlpbase_hlnfieldwidth;
22144 0 : istart = network->structinfo.ptr.p_int[5];
22145 :
22146 : /*
22147 : * search
22148 : */
22149 0 : network->integerbuf.ptr.p_int[0] = k;
22150 0 : network->integerbuf.ptr.p_int[1] = i;
22151 0 : highlevelidx = recsearch(&network->hlneurons, mlpbase_hlnfieldwidth, 2, 0, ncnt, &network->integerbuf, _state);
22152 0 : ae_assert(highlevelidx>=0, "MLPSetNeuronInfo: incorrect (nonexistent) layer or neuron index", _state);
22153 :
22154 : /*
22155 : * activation function
22156 : */
22157 0 : if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]>=0 )
22158 : {
22159 0 : activationoffset = istart+network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+2]*mlpbase_nfieldwidth;
22160 0 : network->structinfo.ptr.p_int[activationoffset+0] = fkind;
22161 : }
22162 : else
22163 : {
22164 0 : ae_assert(fkind==0, "MLPSetNeuronInfo: you try to set activation function for neuron which can not have one", _state);
22165 : }
22166 :
22167 : /*
22168 : * Threshold
22169 : */
22170 0 : if( network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]>=0 )
22171 : {
22172 0 : network->weights.ptr.p_double[network->hlneurons.ptr.p_int[highlevelidx*mlpbase_hlnfieldwidth+3]] = threshold;
22173 : }
22174 : else
22175 : {
22176 0 : ae_assert(ae_fp_eq(threshold,(double)(0)), "MLPSetNeuronInfo: you try to set non-zero threshold for neuron which can not have one", _state);
22177 : }
22178 0 : }
22179 :
22180 :
22181 : /*************************************************************************
22182 : This function modifies information about connection from I0-th neuron of
22183 : K0-th layer to I1-th neuron of K1-th layer.
22184 :
22185 : INPUT PARAMETERS:
22186 : Network - network
22187 : K0 - layer index
22188 : I0 - neuron index (within layer)
22189 : K1 - layer index
22190 : I1 - neuron index (within layer)
22191 : W - connection weight (must be zero for non-existent
22192 : connections)
22193 :
22194 : This function:
22195 : 1. throws exception if layer or neuron with given index do not exists.
22196 : 2. throws exception if you try to set non-zero weight for non-existent
22197 : connection
22198 :
22199 : -- ALGLIB --
22200 : Copyright 25.03.2011 by Bochkanov Sergey
22201 : *************************************************************************/
22202 0 : void mlpsetweight(multilayerperceptron* network,
22203 : ae_int_t k0,
22204 : ae_int_t i0,
22205 : ae_int_t k1,
22206 : ae_int_t i1,
22207 : double w,
22208 : ae_state *_state)
22209 : {
22210 : ae_int_t ccnt;
22211 : ae_int_t highlevelidx;
22212 :
22213 :
22214 0 : ccnt = network->hlconnections.cnt/mlpbase_hlconnfieldwidth;
22215 :
22216 : /*
22217 : * check params
22218 : */
22219 0 : ae_assert(k0>=0&&k0<network->hllayersizes.cnt, "MLPSetWeight: incorrect (nonexistent) K0", _state);
22220 0 : ae_assert(i0>=0&&i0<network->hllayersizes.ptr.p_int[k0], "MLPSetWeight: incorrect (nonexistent) I0", _state);
22221 0 : ae_assert(k1>=0&&k1<network->hllayersizes.cnt, "MLPSetWeight: incorrect (nonexistent) K1", _state);
22222 0 : ae_assert(i1>=0&&i1<network->hllayersizes.ptr.p_int[k1], "MLPSetWeight: incorrect (nonexistent) I1", _state);
22223 0 : ae_assert(ae_isfinite(w, _state), "MLPSetWeight: infinite or NAN weight", _state);
22224 :
22225 : /*
22226 : * search
22227 : */
22228 0 : network->integerbuf.ptr.p_int[0] = k0;
22229 0 : network->integerbuf.ptr.p_int[1] = i0;
22230 0 : network->integerbuf.ptr.p_int[2] = k1;
22231 0 : network->integerbuf.ptr.p_int[3] = i1;
22232 0 : highlevelidx = recsearch(&network->hlconnections, mlpbase_hlconnfieldwidth, 4, 0, ccnt, &network->integerbuf, _state);
22233 0 : if( highlevelidx>=0 )
22234 : {
22235 0 : network->weights.ptr.p_double[network->hlconnections.ptr.p_int[highlevelidx*mlpbase_hlconnfieldwidth+4]] = w;
22236 : }
22237 : else
22238 : {
22239 0 : ae_assert(ae_fp_eq(w,(double)(0)), "MLPSetWeight: you try to set non-zero weight for non-existent connection", _state);
22240 : }
22241 0 : }
22242 :
22243 :
22244 : /*************************************************************************
22245 : Neural network activation function
22246 :
22247 : INPUT PARAMETERS:
22248 : NET - neuron input
22249 : K - function index (zero for linear function)
22250 :
22251 : OUTPUT PARAMETERS:
22252 : F - function
22253 : DF - its derivative
22254 : D2F - its second derivative
22255 :
22256 : -- ALGLIB --
22257 : Copyright 04.11.2007 by Bochkanov Sergey
22258 : *************************************************************************/
22259 0 : void mlpactivationfunction(double net,
22260 : ae_int_t k,
22261 : double* f,
22262 : double* df,
22263 : double* d2f,
22264 : ae_state *_state)
22265 : {
22266 : double net2;
22267 : double arg;
22268 : double root;
22269 : double r;
22270 :
22271 0 : *f = 0;
22272 0 : *df = 0;
22273 0 : *d2f = 0;
22274 :
22275 0 : if( k==0||k==-5 )
22276 : {
22277 0 : *f = net;
22278 0 : *df = (double)(1);
22279 0 : *d2f = (double)(0);
22280 0 : return;
22281 : }
22282 0 : if( k==1 )
22283 : {
22284 :
22285 : /*
22286 : * TanH activation function
22287 : */
22288 0 : if( ae_fp_less(ae_fabs(net, _state),(double)(100)) )
22289 : {
22290 0 : *f = ae_tanh(net, _state);
22291 : }
22292 : else
22293 : {
22294 0 : *f = (double)(ae_sign(net, _state));
22295 : }
22296 0 : *df = 1-*f*(*f);
22297 0 : *d2f = -2*(*f)*(*df);
22298 0 : return;
22299 : }
22300 0 : if( k==3 )
22301 : {
22302 :
22303 : /*
22304 : * EX activation function
22305 : */
22306 0 : if( ae_fp_greater_eq(net,(double)(0)) )
22307 : {
22308 0 : net2 = net*net;
22309 0 : arg = net2+1;
22310 0 : root = ae_sqrt(arg, _state);
22311 0 : *f = net+root;
22312 0 : r = net/root;
22313 0 : *df = 1+r;
22314 0 : *d2f = (root-net*r)/arg;
22315 : }
22316 : else
22317 : {
22318 0 : *f = ae_exp(net, _state);
22319 0 : *df = *f;
22320 0 : *d2f = *f;
22321 : }
22322 0 : return;
22323 : }
22324 0 : if( k==2 )
22325 : {
22326 0 : *f = ae_exp(-ae_sqr(net, _state), _state);
22327 0 : *df = -2*net*(*f);
22328 0 : *d2f = -2*(*f+*df*net);
22329 0 : return;
22330 : }
22331 0 : *f = (double)(0);
22332 0 : *df = (double)(0);
22333 0 : *d2f = (double)(0);
22334 : }
22335 :
22336 :
22337 : /*************************************************************************
22338 : Procesing
22339 :
22340 : INPUT PARAMETERS:
22341 : Network - neural network
22342 : X - input vector, array[0..NIn-1].
22343 :
22344 : OUTPUT PARAMETERS:
22345 : Y - result. Regression estimate when solving regression task,
22346 : vector of posterior probabilities for classification task.
22347 :
22348 : See also MLPProcessI
22349 :
22350 : -- ALGLIB --
22351 : Copyright 04.11.2007 by Bochkanov Sergey
22352 : *************************************************************************/
22353 0 : void mlpprocess(multilayerperceptron* network,
22354 : /* Real */ ae_vector* x,
22355 : /* Real */ ae_vector* y,
22356 : ae_state *_state)
22357 : {
22358 :
22359 :
22360 0 : if( y->cnt<network->structinfo.ptr.p_int[2] )
22361 : {
22362 0 : ae_vector_set_length(y, network->structinfo.ptr.p_int[2], _state);
22363 : }
22364 0 : mlpinternalprocessvector(&network->structinfo, &network->weights, &network->columnmeans, &network->columnsigmas, &network->neurons, &network->dfdnet, x, y, _state);
22365 0 : }
22366 :
22367 :
22368 : /*************************************************************************
22369 : 'interactive' variant of MLPProcess for languages like Python which
22370 : support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the
22371 : interpreter
22372 :
22373 : This function allocates new array on each call, so it is significantly
22374 : slower than its 'non-interactive' counterpart, but it is more convenient
22375 : when you call it from command line.
22376 :
22377 : -- ALGLIB --
22378 : Copyright 21.09.2010 by Bochkanov Sergey
22379 : *************************************************************************/
22380 0 : void mlpprocessi(multilayerperceptron* network,
22381 : /* Real */ ae_vector* x,
22382 : /* Real */ ae_vector* y,
22383 : ae_state *_state)
22384 : {
22385 :
22386 0 : ae_vector_clear(y);
22387 :
22388 0 : mlpprocess(network, x, y, _state);
22389 0 : }
22390 :
22391 :
22392 : /*************************************************************************
22393 : Error of the neural network on dataset.
22394 :
22395 : ! COMMERCIAL EDITION OF ALGLIB:
22396 : !
22397 : ! Commercial Edition of ALGLIB includes following important improvements
22398 : ! of this function:
22399 : ! * high-performance native backend with same C# interface (C# version)
22400 : ! * multithreading support (C++ and C# versions)
22401 : !
22402 : ! We recommend you to read 'Working with commercial version' section of
22403 : ! ALGLIB Reference Manual in order to find out how to use performance-
22404 : ! related features provided by commercial edition of ALGLIB.
22405 :
22406 : INPUT PARAMETERS:
22407 : Network - neural network;
22408 : XY - training set, see below for information on the
22409 : training set format;
22410 : NPoints - points count.
22411 :
22412 : RESULT:
22413 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
22414 :
22415 : DATASET FORMAT:
22416 :
22417 : This function uses two different dataset formats - one for regression
22418 : networks, another one for classification networks.
22419 :
22420 : For regression networks with NIn inputs and NOut outputs following dataset
22421 : format is used:
22422 : * dataset is given by NPoints*(NIn+NOut) matrix
22423 : * each row corresponds to one example
22424 : * first NIn columns are inputs, next NOut columns are outputs
22425 :
22426 : For classification networks with NIn inputs and NClasses clases following
22427 : dataset format is used:
22428 : * dataset is given by NPoints*(NIn+1) matrix
22429 : * each row corresponds to one example
22430 : * first NIn columns are inputs, last column stores class number (from 0 to
22431 : NClasses-1).
22432 :
22433 : -- ALGLIB --
22434 : Copyright 04.11.2007 by Bochkanov Sergey
22435 : *************************************************************************/
22436 0 : double mlperror(multilayerperceptron* network,
22437 : /* Real */ ae_matrix* xy,
22438 : ae_int_t npoints,
22439 : ae_state *_state)
22440 : {
22441 : double result;
22442 :
22443 :
22444 0 : ae_assert(xy->rows>=npoints, "MLPError: XY has less than NPoints rows", _state);
22445 0 : if( npoints>0 )
22446 : {
22447 0 : if( mlpissoftmax(network, _state) )
22448 : {
22449 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPError: XY has less than NIn+1 columns", _state);
22450 : }
22451 : else
22452 : {
22453 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPError: XY has less than NIn+NOut columns", _state);
22454 : }
22455 : }
22456 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22457 0 : result = ae_sqr(network->err.rmserror, _state)*npoints*mlpgetoutputscount(network, _state)/2;
22458 0 : return result;
22459 : }
22460 :
22461 :
22462 : /*************************************************************************
22463 : Error of the neural network on dataset given by sparse matrix.
22464 :
22465 : ! COMMERCIAL EDITION OF ALGLIB:
22466 : !
22467 : ! Commercial Edition of ALGLIB includes following important improvements
22468 : ! of this function:
22469 : ! * high-performance native backend with same C# interface (C# version)
22470 : ! * multithreading support (C++ and C# versions)
22471 : !
22472 : ! We recommend you to read 'Working with commercial version' section of
22473 : ! ALGLIB Reference Manual in order to find out how to use performance-
22474 : ! related features provided by commercial edition of ALGLIB.
22475 :
22476 : INPUT PARAMETERS:
22477 : Network - neural network
22478 : XY - training set, see below for information on the
22479 : training set format. This function checks correctness
22480 : of the dataset (no NANs/INFs, class numbers are
22481 : correct) and throws exception when incorrect dataset
22482 : is passed. Sparse matrix must use CRS format for
22483 : storage.
22484 : NPoints - points count, >=0
22485 :
22486 : RESULT:
22487 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
22488 :
22489 : DATASET FORMAT:
22490 :
22491 : This function uses two different dataset formats - one for regression
22492 : networks, another one for classification networks.
22493 :
22494 : For regression networks with NIn inputs and NOut outputs following dataset
22495 : format is used:
22496 : * dataset is given by NPoints*(NIn+NOut) matrix
22497 : * each row corresponds to one example
22498 : * first NIn columns are inputs, next NOut columns are outputs
22499 :
22500 : For classification networks with NIn inputs and NClasses clases following
22501 : dataset format is used:
22502 : * dataset is given by NPoints*(NIn+1) matrix
22503 : * each row corresponds to one example
22504 : * first NIn columns are inputs, last column stores class number (from 0 to
22505 : NClasses-1).
22506 :
22507 : -- ALGLIB --
22508 : Copyright 23.07.2012 by Bochkanov Sergey
22509 : *************************************************************************/
22510 0 : double mlperrorsparse(multilayerperceptron* network,
22511 : sparsematrix* xy,
22512 : ae_int_t npoints,
22513 : ae_state *_state)
22514 : {
22515 : double result;
22516 :
22517 :
22518 0 : ae_assert(sparseiscrs(xy, _state), "MLPErrorSparse: XY is not in CRS format.", _state);
22519 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPErrorSparse: XY has less than NPoints rows", _state);
22520 0 : if( npoints>0 )
22521 : {
22522 0 : if( mlpissoftmax(network, _state) )
22523 : {
22524 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPErrorSparse: XY has less than NIn+1 columns", _state);
22525 : }
22526 : else
22527 : {
22528 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSparse: XY has less than NIn+NOut columns", _state);
22529 : }
22530 : }
22531 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22532 0 : result = ae_sqr(network->err.rmserror, _state)*npoints*mlpgetoutputscount(network, _state)/2;
22533 0 : return result;
22534 : }
22535 :
22536 :
22537 : /*************************************************************************
22538 : Natural error function for neural network, internal subroutine.
22539 :
22540 : NOTE: this function is single-threaded. Unlike other error function, it
22541 : receives no speed-up from being executed in SMP mode.
22542 :
22543 : -- ALGLIB --
22544 : Copyright 04.11.2007 by Bochkanov Sergey
22545 : *************************************************************************/
22546 0 : double mlperrorn(multilayerperceptron* network,
22547 : /* Real */ ae_matrix* xy,
22548 : ae_int_t ssize,
22549 : ae_state *_state)
22550 : {
22551 : ae_int_t i;
22552 : ae_int_t k;
22553 : ae_int_t nin;
22554 : ae_int_t nout;
22555 : ae_int_t wcount;
22556 : double e;
22557 : double result;
22558 :
22559 :
22560 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
22561 0 : result = (double)(0);
22562 0 : for(i=0; i<=ssize-1; i++)
22563 : {
22564 :
22565 : /*
22566 : * Process vector
22567 : */
22568 0 : ae_v_move(&network->x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
22569 0 : mlpprocess(network, &network->x, &network->y, _state);
22570 :
22571 : /*
22572 : * Update error function
22573 : */
22574 0 : if( network->structinfo.ptr.p_int[6]==0 )
22575 : {
22576 :
22577 : /*
22578 : * Least squares error function
22579 : */
22580 0 : ae_v_sub(&network->y.ptr.p_double[0], 1, &xy->ptr.pp_double[i][nin], 1, ae_v_len(0,nout-1));
22581 0 : e = ae_v_dotproduct(&network->y.ptr.p_double[0], 1, &network->y.ptr.p_double[0], 1, ae_v_len(0,nout-1));
22582 0 : result = result+e/2;
22583 : }
22584 : else
22585 : {
22586 :
22587 : /*
22588 : * Cross-entropy error function
22589 : */
22590 0 : k = ae_round(xy->ptr.pp_double[i][nin], _state);
22591 0 : if( k>=0&&k<nout )
22592 : {
22593 0 : result = result+mlpbase_safecrossentropy((double)(1), network->y.ptr.p_double[k], _state);
22594 : }
22595 : }
22596 : }
22597 0 : return result;
22598 : }
22599 :
22600 :
22601 : /*************************************************************************
22602 : Classification error of the neural network on dataset.
22603 :
22604 : ! COMMERCIAL EDITION OF ALGLIB:
22605 : !
22606 : ! Commercial Edition of ALGLIB includes following important improvements
22607 : ! of this function:
22608 : ! * high-performance native backend with same C# interface (C# version)
22609 : ! * multithreading support (C++ and C# versions)
22610 : !
22611 : ! We recommend you to read 'Working with commercial version' section of
22612 : ! ALGLIB Reference Manual in order to find out how to use performance-
22613 : ! related features provided by commercial edition of ALGLIB.
22614 :
22615 : INPUT PARAMETERS:
22616 : Network - neural network;
22617 : XY - training set, see below for information on the
22618 : training set format;
22619 : NPoints - points count.
22620 :
22621 : RESULT:
22622 : classification error (number of misclassified cases)
22623 :
22624 : DATASET FORMAT:
22625 :
22626 : This function uses two different dataset formats - one for regression
22627 : networks, another one for classification networks.
22628 :
22629 : For regression networks with NIn inputs and NOut outputs following dataset
22630 : format is used:
22631 : * dataset is given by NPoints*(NIn+NOut) matrix
22632 : * each row corresponds to one example
22633 : * first NIn columns are inputs, next NOut columns are outputs
22634 :
22635 : For classification networks with NIn inputs and NClasses clases following
22636 : dataset format is used:
22637 : * dataset is given by NPoints*(NIn+1) matrix
22638 : * each row corresponds to one example
22639 : * first NIn columns are inputs, last column stores class number (from 0 to
22640 : NClasses-1).
22641 :
22642 : -- ALGLIB --
22643 : Copyright 04.11.2007 by Bochkanov Sergey
22644 : *************************************************************************/
22645 0 : ae_int_t mlpclserror(multilayerperceptron* network,
22646 : /* Real */ ae_matrix* xy,
22647 : ae_int_t npoints,
22648 : ae_state *_state)
22649 : {
22650 : ae_int_t result;
22651 :
22652 :
22653 0 : ae_assert(xy->rows>=npoints, "MLPClsError: XY has less than NPoints rows", _state);
22654 0 : if( npoints>0 )
22655 : {
22656 0 : if( mlpissoftmax(network, _state) )
22657 : {
22658 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPClsError: XY has less than NIn+1 columns", _state);
22659 : }
22660 : else
22661 : {
22662 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPClsError: XY has less than NIn+NOut columns", _state);
22663 : }
22664 : }
22665 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22666 0 : result = ae_round(npoints*network->err.relclserror, _state);
22667 0 : return result;
22668 : }
22669 :
22670 :
22671 : /*************************************************************************
22672 : Relative classification error on the test set.
22673 :
22674 : ! COMMERCIAL EDITION OF ALGLIB:
22675 : !
22676 : ! Commercial Edition of ALGLIB includes following important improvements
22677 : ! of this function:
22678 : ! * high-performance native backend with same C# interface (C# version)
22679 : ! * multithreading support (C++ and C# versions)
22680 : !
22681 : ! We recommend you to read 'Working with commercial version' section of
22682 : ! ALGLIB Reference Manual in order to find out how to use performance-
22683 : ! related features provided by commercial edition of ALGLIB.
22684 :
22685 : INPUT PARAMETERS:
22686 : Network - neural network;
22687 : XY - training set, see below for information on the
22688 : training set format;
22689 : NPoints - points count.
22690 :
22691 : RESULT:
22692 : Percent of incorrectly classified cases. Works both for classifier
22693 : networks and general purpose networks used as classifiers.
22694 :
22695 : DATASET FORMAT:
22696 :
22697 : This function uses two different dataset formats - one for regression
22698 : networks, another one for classification networks.
22699 :
22700 : For regression networks with NIn inputs and NOut outputs following dataset
22701 : format is used:
22702 : * dataset is given by NPoints*(NIn+NOut) matrix
22703 : * each row corresponds to one example
22704 : * first NIn columns are inputs, next NOut columns are outputs
22705 :
22706 : For classification networks with NIn inputs and NClasses clases following
22707 : dataset format is used:
22708 : * dataset is given by NPoints*(NIn+1) matrix
22709 : * each row corresponds to one example
22710 : * first NIn columns are inputs, last column stores class number (from 0 to
22711 : NClasses-1).
22712 :
22713 : -- ALGLIB --
22714 : Copyright 25.12.2008 by Bochkanov Sergey
22715 : *************************************************************************/
22716 0 : double mlprelclserror(multilayerperceptron* network,
22717 : /* Real */ ae_matrix* xy,
22718 : ae_int_t npoints,
22719 : ae_state *_state)
22720 : {
22721 : double result;
22722 :
22723 :
22724 0 : ae_assert(xy->rows>=npoints, "MLPRelClsError: XY has less than NPoints rows", _state);
22725 0 : if( npoints>0 )
22726 : {
22727 0 : if( mlpissoftmax(network, _state) )
22728 : {
22729 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPRelClsError: XY has less than NIn+1 columns", _state);
22730 : }
22731 : else
22732 : {
22733 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRelClsError: XY has less than NIn+NOut columns", _state);
22734 : }
22735 : }
22736 0 : if( npoints>0 )
22737 : {
22738 0 : result = (double)mlpclserror(network, xy, npoints, _state)/(double)npoints;
22739 : }
22740 : else
22741 : {
22742 0 : result = 0.0;
22743 : }
22744 0 : return result;
22745 : }
22746 :
22747 :
22748 : /*************************************************************************
22749 : Relative classification error on the test set given by sparse matrix.
22750 :
22751 : ! COMMERCIAL EDITION OF ALGLIB:
22752 : !
22753 : ! Commercial Edition of ALGLIB includes following important improvements
22754 : ! of this function:
22755 : ! * high-performance native backend with same C# interface (C# version)
22756 : ! * multithreading support (C++ and C# versions)
22757 : !
22758 : ! We recommend you to read 'Working with commercial version' section of
22759 : ! ALGLIB Reference Manual in order to find out how to use performance-
22760 : ! related features provided by commercial edition of ALGLIB.
22761 :
22762 : INPUT PARAMETERS:
22763 : Network - neural network;
22764 : XY - training set, see below for information on the
22765 : training set format. Sparse matrix must use CRS format
22766 : for storage.
22767 : NPoints - points count, >=0.
22768 :
22769 : RESULT:
22770 : Percent of incorrectly classified cases. Works both for classifier
22771 : networks and general purpose networks used as classifiers.
22772 :
22773 : DATASET FORMAT:
22774 :
22775 : This function uses two different dataset formats - one for regression
22776 : networks, another one for classification networks.
22777 :
22778 : For regression networks with NIn inputs and NOut outputs following dataset
22779 : format is used:
22780 : * dataset is given by NPoints*(NIn+NOut) matrix
22781 : * each row corresponds to one example
22782 : * first NIn columns are inputs, next NOut columns are outputs
22783 :
22784 : For classification networks with NIn inputs and NClasses clases following
22785 : dataset format is used:
22786 : * dataset is given by NPoints*(NIn+1) matrix
22787 : * each row corresponds to one example
22788 : * first NIn columns are inputs, last column stores class number (from 0 to
22789 : NClasses-1).
22790 :
22791 : -- ALGLIB --
22792 : Copyright 09.08.2012 by Bochkanov Sergey
22793 : *************************************************************************/
22794 0 : double mlprelclserrorsparse(multilayerperceptron* network,
22795 : sparsematrix* xy,
22796 : ae_int_t npoints,
22797 : ae_state *_state)
22798 : {
22799 : double result;
22800 :
22801 :
22802 0 : ae_assert(sparseiscrs(xy, _state), "MLPRelClsErrorSparse: sparse matrix XY is not in CRS format.", _state);
22803 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPRelClsErrorSparse: sparse matrix XY has less than NPoints rows", _state);
22804 0 : if( npoints>0 )
22805 : {
22806 0 : if( mlpissoftmax(network, _state) )
22807 : {
22808 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPRelClsErrorSparse: sparse matrix XY has less than NIn+1 columns", _state);
22809 : }
22810 : else
22811 : {
22812 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRelClsErrorSparse: sparse matrix XY has less than NIn+NOut columns", _state);
22813 : }
22814 : }
22815 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22816 0 : result = network->err.relclserror;
22817 0 : return result;
22818 : }
22819 :
22820 :
22821 : /*************************************************************************
22822 : Average cross-entropy (in bits per element) on the test set.
22823 :
22824 : ! COMMERCIAL EDITION OF ALGLIB:
22825 : !
22826 : ! Commercial Edition of ALGLIB includes following important improvements
22827 : ! of this function:
22828 : ! * high-performance native backend with same C# interface (C# version)
22829 : ! * multithreading support (C++ and C# versions)
22830 : !
22831 : ! We recommend you to read 'Working with commercial version' section of
22832 : ! ALGLIB Reference Manual in order to find out how to use performance-
22833 : ! related features provided by commercial edition of ALGLIB.
22834 :
22835 : INPUT PARAMETERS:
22836 : Network - neural network;
22837 : XY - training set, see below for information on the
22838 : training set format;
22839 : NPoints - points count.
22840 :
22841 : RESULT:
22842 : CrossEntropy/(NPoints*LN(2)).
22843 : Zero if network solves regression task.
22844 :
22845 : DATASET FORMAT:
22846 :
22847 : This function uses two different dataset formats - one for regression
22848 : networks, another one for classification networks.
22849 :
22850 : For regression networks with NIn inputs and NOut outputs following dataset
22851 : format is used:
22852 : * dataset is given by NPoints*(NIn+NOut) matrix
22853 : * each row corresponds to one example
22854 : * first NIn columns are inputs, next NOut columns are outputs
22855 :
22856 : For classification networks with NIn inputs and NClasses clases following
22857 : dataset format is used:
22858 : * dataset is given by NPoints*(NIn+1) matrix
22859 : * each row corresponds to one example
22860 : * first NIn columns are inputs, last column stores class number (from 0 to
22861 : NClasses-1).
22862 :
22863 : -- ALGLIB --
22864 : Copyright 08.01.2009 by Bochkanov Sergey
22865 : *************************************************************************/
22866 0 : double mlpavgce(multilayerperceptron* network,
22867 : /* Real */ ae_matrix* xy,
22868 : ae_int_t npoints,
22869 : ae_state *_state)
22870 : {
22871 : double result;
22872 :
22873 :
22874 0 : ae_assert(xy->rows>=npoints, "MLPAvgCE: XY has less than NPoints rows", _state);
22875 0 : if( npoints>0 )
22876 : {
22877 0 : if( mlpissoftmax(network, _state) )
22878 : {
22879 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgCE: XY has less than NIn+1 columns", _state);
22880 : }
22881 : else
22882 : {
22883 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgCE: XY has less than NIn+NOut columns", _state);
22884 : }
22885 : }
22886 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22887 0 : result = network->err.avgce;
22888 0 : return result;
22889 : }
22890 :
22891 :
22892 : /*************************************************************************
22893 : Average cross-entropy (in bits per element) on the test set given by
22894 : sparse matrix.
22895 :
22896 : ! COMMERCIAL EDITION OF ALGLIB:
22897 : !
22898 : ! Commercial Edition of ALGLIB includes following important improvements
22899 : ! of this function:
22900 : ! * high-performance native backend with same C# interface (C# version)
22901 : ! * multithreading support (C++ and C# versions)
22902 : !
22903 : ! We recommend you to read 'Working with commercial version' section of
22904 : ! ALGLIB Reference Manual in order to find out how to use performance-
22905 : ! related features provided by commercial edition of ALGLIB.
22906 :
22907 : INPUT PARAMETERS:
22908 : Network - neural network;
22909 : XY - training set, see below for information on the
22910 : training set format. This function checks correctness
22911 : of the dataset (no NANs/INFs, class numbers are
22912 : correct) and throws exception when incorrect dataset
22913 : is passed. Sparse matrix must use CRS format for
22914 : storage.
22915 : NPoints - points count, >=0.
22916 :
22917 : RESULT:
22918 : CrossEntropy/(NPoints*LN(2)).
22919 : Zero if network solves regression task.
22920 :
22921 : DATASET FORMAT:
22922 :
22923 : This function uses two different dataset formats - one for regression
22924 : networks, another one for classification networks.
22925 :
22926 : For regression networks with NIn inputs and NOut outputs following dataset
22927 : format is used:
22928 : * dataset is given by NPoints*(NIn+NOut) matrix
22929 : * each row corresponds to one example
22930 : * first NIn columns are inputs, next NOut columns are outputs
22931 :
22932 : For classification networks with NIn inputs and NClasses clases following
22933 : dataset format is used:
22934 : * dataset is given by NPoints*(NIn+1) matrix
22935 : * each row corresponds to one example
22936 : * first NIn columns are inputs, last column stores class number (from 0 to
22937 : NClasses-1).
22938 :
22939 : -- ALGLIB --
22940 : Copyright 9.08.2012 by Bochkanov Sergey
22941 : *************************************************************************/
22942 0 : double mlpavgcesparse(multilayerperceptron* network,
22943 : sparsematrix* xy,
22944 : ae_int_t npoints,
22945 : ae_state *_state)
22946 : {
22947 : double result;
22948 :
22949 :
22950 0 : ae_assert(sparseiscrs(xy, _state), "MLPAvgCESparse: sparse matrix XY is not in CRS format.", _state);
22951 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgCESparse: sparse matrix XY has less than NPoints rows", _state);
22952 0 : if( npoints>0 )
22953 : {
22954 0 : if( mlpissoftmax(network, _state) )
22955 : {
22956 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgCESparse: sparse matrix XY has less than NIn+1 columns", _state);
22957 : }
22958 : else
22959 : {
22960 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgCESparse: sparse matrix XY has less than NIn+NOut columns", _state);
22961 : }
22962 : }
22963 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
22964 0 : result = network->err.avgce;
22965 0 : return result;
22966 : }
22967 :
22968 :
22969 : /*************************************************************************
22970 : RMS error on the test set given.
22971 :
22972 : ! COMMERCIAL EDITION OF ALGLIB:
22973 : !
22974 : ! Commercial Edition of ALGLIB includes following important improvements
22975 : ! of this function:
22976 : ! * high-performance native backend with same C# interface (C# version)
22977 : ! * multithreading support (C++ and C# versions)
22978 : !
22979 : ! We recommend you to read 'Working with commercial version' section of
22980 : ! ALGLIB Reference Manual in order to find out how to use performance-
22981 : ! related features provided by commercial edition of ALGLIB.
22982 :
22983 : INPUT PARAMETERS:
22984 : Network - neural network;
22985 : XY - training set, see below for information on the
22986 : training set format;
22987 : NPoints - points count.
22988 :
22989 : RESULT:
22990 : Root mean square error. Its meaning for regression task is obvious. As for
22991 : classification task, RMS error means error when estimating posterior
22992 : probabilities.
22993 :
22994 : DATASET FORMAT:
22995 :
22996 : This function uses two different dataset formats - one for regression
22997 : networks, another one for classification networks.
22998 :
22999 : For regression networks with NIn inputs and NOut outputs following dataset
23000 : format is used:
23001 : * dataset is given by NPoints*(NIn+NOut) matrix
23002 : * each row corresponds to one example
23003 : * first NIn columns are inputs, next NOut columns are outputs
23004 :
23005 : For classification networks with NIn inputs and NClasses clases following
23006 : dataset format is used:
23007 : * dataset is given by NPoints*(NIn+1) matrix
23008 : * each row corresponds to one example
23009 : * first NIn columns are inputs, last column stores class number (from 0 to
23010 : NClasses-1).
23011 :
23012 : -- ALGLIB --
23013 : Copyright 04.11.2007 by Bochkanov Sergey
23014 : *************************************************************************/
23015 0 : double mlprmserror(multilayerperceptron* network,
23016 : /* Real */ ae_matrix* xy,
23017 : ae_int_t npoints,
23018 : ae_state *_state)
23019 : {
23020 : double result;
23021 :
23022 :
23023 0 : ae_assert(xy->rows>=npoints, "MLPRMSError: XY has less than NPoints rows", _state);
23024 0 : if( npoints>0 )
23025 : {
23026 0 : if( mlpissoftmax(network, _state) )
23027 : {
23028 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPRMSError: XY has less than NIn+1 columns", _state);
23029 : }
23030 : else
23031 : {
23032 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRMSError: XY has less than NIn+NOut columns", _state);
23033 : }
23034 : }
23035 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23036 0 : result = network->err.rmserror;
23037 0 : return result;
23038 : }
23039 :
23040 :
23041 : /*************************************************************************
23042 : RMS error on the test set given by sparse matrix.
23043 :
23044 : ! COMMERCIAL EDITION OF ALGLIB:
23045 : !
23046 : ! Commercial Edition of ALGLIB includes following important improvements
23047 : ! of this function:
23048 : ! * high-performance native backend with same C# interface (C# version)
23049 : ! * multithreading support (C++ and C# versions)
23050 : !
23051 : ! We recommend you to read 'Working with commercial version' section of
23052 : ! ALGLIB Reference Manual in order to find out how to use performance-
23053 : ! related features provided by commercial edition of ALGLIB.
23054 :
23055 : INPUT PARAMETERS:
23056 : Network - neural network;
23057 : XY - training set, see below for information on the
23058 : training set format. This function checks correctness
23059 : of the dataset (no NANs/INFs, class numbers are
23060 : correct) and throws exception when incorrect dataset
23061 : is passed. Sparse matrix must use CRS format for
23062 : storage.
23063 : NPoints - points count, >=0.
23064 :
23065 : RESULT:
23066 : Root mean square error. Its meaning for regression task is obvious. As for
23067 : classification task, RMS error means error when estimating posterior
23068 : probabilities.
23069 :
23070 : DATASET FORMAT:
23071 :
23072 : This function uses two different dataset formats - one for regression
23073 : networks, another one for classification networks.
23074 :
23075 : For regression networks with NIn inputs and NOut outputs following dataset
23076 : format is used:
23077 : * dataset is given by NPoints*(NIn+NOut) matrix
23078 : * each row corresponds to one example
23079 : * first NIn columns are inputs, next NOut columns are outputs
23080 :
23081 : For classification networks with NIn inputs and NClasses clases following
23082 : dataset format is used:
23083 : * dataset is given by NPoints*(NIn+1) matrix
23084 : * each row corresponds to one example
23085 : * first NIn columns are inputs, last column stores class number (from 0 to
23086 : NClasses-1).
23087 :
23088 : -- ALGLIB --
23089 : Copyright 09.08.2012 by Bochkanov Sergey
23090 : *************************************************************************/
23091 0 : double mlprmserrorsparse(multilayerperceptron* network,
23092 : sparsematrix* xy,
23093 : ae_int_t npoints,
23094 : ae_state *_state)
23095 : {
23096 : double result;
23097 :
23098 :
23099 0 : ae_assert(sparseiscrs(xy, _state), "MLPRMSErrorSparse: sparse matrix XY is not in CRS format.", _state);
23100 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPRMSErrorSparse: sparse matrix XY has less than NPoints rows", _state);
23101 0 : if( npoints>0 )
23102 : {
23103 0 : if( mlpissoftmax(network, _state) )
23104 : {
23105 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPRMSErrorSparse: sparse matrix XY has less than NIn+1 columns", _state);
23106 : }
23107 : else
23108 : {
23109 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPRMSErrorSparse: sparse matrix XY has less than NIn+NOut columns", _state);
23110 : }
23111 : }
23112 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23113 0 : result = network->err.rmserror;
23114 0 : return result;
23115 : }
23116 :
23117 :
23118 : /*************************************************************************
23119 : Average absolute error on the test set.
23120 :
23121 : ! COMMERCIAL EDITION OF ALGLIB:
23122 : !
23123 : ! Commercial Edition of ALGLIB includes following important improvements
23124 : ! of this function:
23125 : ! * high-performance native backend with same C# interface (C# version)
23126 : ! * multithreading support (C++ and C# versions)
23127 : !
23128 : ! We recommend you to read 'Working with commercial version' section of
23129 : ! ALGLIB Reference Manual in order to find out how to use performance-
23130 : ! related features provided by commercial edition of ALGLIB.
23131 :
23132 : INPUT PARAMETERS:
23133 : Network - neural network;
23134 : XY - training set, see below for information on the
23135 : training set format;
23136 : NPoints - points count.
23137 :
23138 : RESULT:
23139 : Its meaning for regression task is obvious. As for classification task, it
23140 : means average error when estimating posterior probabilities.
23141 :
23142 : DATASET FORMAT:
23143 :
23144 : This function uses two different dataset formats - one for regression
23145 : networks, another one for classification networks.
23146 :
23147 : For regression networks with NIn inputs and NOut outputs following dataset
23148 : format is used:
23149 : * dataset is given by NPoints*(NIn+NOut) matrix
23150 : * each row corresponds to one example
23151 : * first NIn columns are inputs, next NOut columns are outputs
23152 :
23153 : For classification networks with NIn inputs and NClasses clases following
23154 : dataset format is used:
23155 : * dataset is given by NPoints*(NIn+1) matrix
23156 : * each row corresponds to one example
23157 : * first NIn columns are inputs, last column stores class number (from 0 to
23158 : NClasses-1).
23159 :
23160 : -- ALGLIB --
23161 : Copyright 11.03.2008 by Bochkanov Sergey
23162 : *************************************************************************/
23163 0 : double mlpavgerror(multilayerperceptron* network,
23164 : /* Real */ ae_matrix* xy,
23165 : ae_int_t npoints,
23166 : ae_state *_state)
23167 : {
23168 : double result;
23169 :
23170 :
23171 0 : ae_assert(xy->rows>=npoints, "MLPAvgError: XY has less than NPoints rows", _state);
23172 0 : if( npoints>0 )
23173 : {
23174 0 : if( mlpissoftmax(network, _state) )
23175 : {
23176 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgError: XY has less than NIn+1 columns", _state);
23177 : }
23178 : else
23179 : {
23180 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgError: XY has less than NIn+NOut columns", _state);
23181 : }
23182 : }
23183 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23184 0 : result = network->err.avgerror;
23185 0 : return result;
23186 : }
23187 :
23188 :
23189 : /*************************************************************************
23190 : Average absolute error on the test set given by sparse matrix.
23191 :
23192 : ! COMMERCIAL EDITION OF ALGLIB:
23193 : !
23194 : ! Commercial Edition of ALGLIB includes following important improvements
23195 : ! of this function:
23196 : ! * high-performance native backend with same C# interface (C# version)
23197 : ! * multithreading support (C++ and C# versions)
23198 : !
23199 : ! We recommend you to read 'Working with commercial version' section of
23200 : ! ALGLIB Reference Manual in order to find out how to use performance-
23201 : ! related features provided by commercial edition of ALGLIB.
23202 :
23203 : INPUT PARAMETERS:
23204 : Network - neural network;
23205 : XY - training set, see below for information on the
23206 : training set format. This function checks correctness
23207 : of the dataset (no NANs/INFs, class numbers are
23208 : correct) and throws exception when incorrect dataset
23209 : is passed. Sparse matrix must use CRS format for
23210 : storage.
23211 : NPoints - points count, >=0.
23212 :
23213 : RESULT:
23214 : Its meaning for regression task is obvious. As for classification task, it
23215 : means average error when estimating posterior probabilities.
23216 :
23217 : DATASET FORMAT:
23218 :
23219 : This function uses two different dataset formats - one for regression
23220 : networks, another one for classification networks.
23221 :
23222 : For regression networks with NIn inputs and NOut outputs following dataset
23223 : format is used:
23224 : * dataset is given by NPoints*(NIn+NOut) matrix
23225 : * each row corresponds to one example
23226 : * first NIn columns are inputs, next NOut columns are outputs
23227 :
23228 : For classification networks with NIn inputs and NClasses clases following
23229 : dataset format is used:
23230 : * dataset is given by NPoints*(NIn+1) matrix
23231 : * each row corresponds to one example
23232 : * first NIn columns are inputs, last column stores class number (from 0 to
23233 : NClasses-1).
23234 :
23235 : -- ALGLIB --
23236 : Copyright 09.08.2012 by Bochkanov Sergey
23237 : *************************************************************************/
23238 0 : double mlpavgerrorsparse(multilayerperceptron* network,
23239 : sparsematrix* xy,
23240 : ae_int_t npoints,
23241 : ae_state *_state)
23242 : {
23243 : double result;
23244 :
23245 :
23246 0 : ae_assert(sparseiscrs(xy, _state), "MLPAvgErrorSparse: XY is not in CRS format.", _state);
23247 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgErrorSparse: XY has less than NPoints rows", _state);
23248 0 : if( npoints>0 )
23249 : {
23250 0 : if( mlpissoftmax(network, _state) )
23251 : {
23252 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgErrorSparse: XY has less than NIn+1 columns", _state);
23253 : }
23254 : else
23255 : {
23256 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgErrorSparse: XY has less than NIn+NOut columns", _state);
23257 : }
23258 : }
23259 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23260 0 : result = network->err.avgerror;
23261 0 : return result;
23262 : }
23263 :
23264 :
23265 : /*************************************************************************
23266 : Average relative error on the test set.
23267 :
23268 : ! COMMERCIAL EDITION OF ALGLIB:
23269 : !
23270 : ! Commercial Edition of ALGLIB includes following important improvements
23271 : ! of this function:
23272 : ! * high-performance native backend with same C# interface (C# version)
23273 : ! * multithreading support (C++ and C# versions)
23274 : !
23275 : ! We recommend you to read 'Working with commercial version' section of
23276 : ! ALGLIB Reference Manual in order to find out how to use performance-
23277 : ! related features provided by commercial edition of ALGLIB.
23278 :
23279 : INPUT PARAMETERS:
23280 : Network - neural network;
23281 : XY - training set, see below for information on the
23282 : training set format;
23283 : NPoints - points count.
23284 :
23285 : RESULT:
23286 : Its meaning for regression task is obvious. As for classification task, it
23287 : means average relative error when estimating posterior probability of
23288 : belonging to the correct class.
23289 :
23290 : DATASET FORMAT:
23291 :
23292 : This function uses two different dataset formats - one for regression
23293 : networks, another one for classification networks.
23294 :
23295 : For regression networks with NIn inputs and NOut outputs following dataset
23296 : format is used:
23297 : * dataset is given by NPoints*(NIn+NOut) matrix
23298 : * each row corresponds to one example
23299 : * first NIn columns are inputs, next NOut columns are outputs
23300 :
23301 : For classification networks with NIn inputs and NClasses clases following
23302 : dataset format is used:
23303 : * dataset is given by NPoints*(NIn+1) matrix
23304 : * each row corresponds to one example
23305 : * first NIn columns are inputs, last column stores class number (from 0 to
23306 : NClasses-1).
23307 :
23308 : -- ALGLIB --
23309 : Copyright 11.03.2008 by Bochkanov Sergey
23310 : *************************************************************************/
23311 0 : double mlpavgrelerror(multilayerperceptron* network,
23312 : /* Real */ ae_matrix* xy,
23313 : ae_int_t npoints,
23314 : ae_state *_state)
23315 : {
23316 : double result;
23317 :
23318 :
23319 0 : ae_assert(xy->rows>=npoints, "MLPAvgRelError: XY has less than NPoints rows", _state);
23320 0 : if( npoints>0 )
23321 : {
23322 0 : if( mlpissoftmax(network, _state) )
23323 : {
23324 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAvgRelError: XY has less than NIn+1 columns", _state);
23325 : }
23326 : else
23327 : {
23328 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgRelError: XY has less than NIn+NOut columns", _state);
23329 : }
23330 : }
23331 0 : mlpallerrorsx(network, xy, &network->dummysxy, npoints, 0, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23332 0 : result = network->err.avgrelerror;
23333 0 : return result;
23334 : }
23335 :
23336 :
23337 : /*************************************************************************
23338 : Average relative error on the test set given by sparse matrix.
23339 :
23340 : ! COMMERCIAL EDITION OF ALGLIB:
23341 : !
23342 : ! Commercial Edition of ALGLIB includes following important improvements
23343 : ! of this function:
23344 : ! * high-performance native backend with same C# interface (C# version)
23345 : ! * multithreading support (C++ and C# versions)
23346 : !
23347 : ! We recommend you to read 'Working with commercial version' section of
23348 : ! ALGLIB Reference Manual in order to find out how to use performance-
23349 : ! related features provided by commercial edition of ALGLIB.
23350 :
23351 : INPUT PARAMETERS:
23352 : Network - neural network;
23353 : XY - training set, see below for information on the
23354 : training set format. This function checks correctness
23355 : of the dataset (no NANs/INFs, class numbers are
23356 : correct) and throws exception when incorrect dataset
23357 : is passed. Sparse matrix must use CRS format for
23358 : storage.
23359 : NPoints - points count, >=0.
23360 :
23361 : RESULT:
23362 : Its meaning for regression task is obvious. As for classification task, it
23363 : means average relative error when estimating posterior probability of
23364 : belonging to the correct class.
23365 :
23366 : DATASET FORMAT:
23367 :
23368 : This function uses two different dataset formats - one for regression
23369 : networks, another one for classification networks.
23370 :
23371 : For regression networks with NIn inputs and NOut outputs following dataset
23372 : format is used:
23373 : * dataset is given by NPoints*(NIn+NOut) matrix
23374 : * each row corresponds to one example
23375 : * first NIn columns are inputs, next NOut columns are outputs
23376 :
23377 : For classification networks with NIn inputs and NClasses clases following
23378 : dataset format is used:
23379 : * dataset is given by NPoints*(NIn+1) matrix
23380 : * each row corresponds to one example
23381 : * first NIn columns are inputs, last column stores class number (from 0 to
23382 : NClasses-1).
23383 :
23384 : -- ALGLIB --
23385 : Copyright 09.08.2012 by Bochkanov Sergey
23386 : *************************************************************************/
23387 0 : double mlpavgrelerrorsparse(multilayerperceptron* network,
23388 : sparsematrix* xy,
23389 : ae_int_t npoints,
23390 : ae_state *_state)
23391 : {
23392 : double result;
23393 :
23394 :
23395 0 : ae_assert(sparseiscrs(xy, _state), "MLPAvgRelErrorSparse: XY is not in CRS format.", _state);
23396 0 : ae_assert(sparsegetnrows(xy, _state)>=npoints, "MLPAvgRelErrorSparse: XY has less than NPoints rows", _state);
23397 0 : if( npoints>0 )
23398 : {
23399 0 : if( mlpissoftmax(network, _state) )
23400 : {
23401 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAvgRelErrorSparse: XY has less than NIn+1 columns", _state);
23402 : }
23403 : else
23404 : {
23405 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAvgRelErrorSparse: XY has less than NIn+NOut columns", _state);
23406 : }
23407 : }
23408 0 : mlpallerrorsx(network, &network->dummydxy, xy, npoints, 1, &network->dummyidx, 0, npoints, 0, &network->buf, &network->err, _state);
23409 0 : result = network->err.avgrelerror;
23410 0 : return result;
23411 : }
23412 :
23413 :
23414 : /*************************************************************************
23415 : Gradient calculation
23416 :
23417 : INPUT PARAMETERS:
23418 : Network - network initialized with one of the network creation funcs
23419 : X - input vector, length of array must be at least NIn
23420 : DesiredY- desired outputs, length of array must be at least NOut
23421 : Grad - possibly preallocated array. If size of array is smaller
23422 : than WCount, it will be reallocated. It is recommended to
23423 : reuse previously allocated array to reduce allocation
23424 : overhead.
23425 :
23426 : OUTPUT PARAMETERS:
23427 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
23428 : Grad - gradient of E with respect to weights of network, array[WCount]
23429 :
23430 : -- ALGLIB --
23431 : Copyright 04.11.2007 by Bochkanov Sergey
23432 : *************************************************************************/
23433 0 : void mlpgrad(multilayerperceptron* network,
23434 : /* Real */ ae_vector* x,
23435 : /* Real */ ae_vector* desiredy,
23436 : double* e,
23437 : /* Real */ ae_vector* grad,
23438 : ae_state *_state)
23439 : {
23440 : ae_int_t i;
23441 : ae_int_t nout;
23442 : ae_int_t ntotal;
23443 :
23444 0 : *e = 0;
23445 :
23446 :
23447 : /*
23448 : * Alloc
23449 : */
23450 0 : rvectorsetlengthatleast(grad, network->structinfo.ptr.p_int[4], _state);
23451 :
23452 : /*
23453 : * Prepare dError/dOut, internal structures
23454 : */
23455 0 : mlpprocess(network, x, &network->y, _state);
23456 0 : nout = network->structinfo.ptr.p_int[2];
23457 0 : ntotal = network->structinfo.ptr.p_int[3];
23458 0 : *e = (double)(0);
23459 0 : for(i=0; i<=ntotal-1; i++)
23460 : {
23461 0 : network->derror.ptr.p_double[i] = (double)(0);
23462 : }
23463 0 : for(i=0; i<=nout-1; i++)
23464 : {
23465 0 : network->derror.ptr.p_double[ntotal-nout+i] = network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
23466 0 : *e = *e+ae_sqr(network->y.ptr.p_double[i]-desiredy->ptr.p_double[i], _state)/2;
23467 : }
23468 :
23469 : /*
23470 : * gradient
23471 : */
23472 0 : mlpbase_mlpinternalcalculategradient(network, &network->neurons, &network->weights, &network->derror, grad, ae_false, _state);
23473 0 : }
23474 :
23475 :
23476 : /*************************************************************************
23477 : Gradient calculation (natural error function is used)
23478 :
23479 : INPUT PARAMETERS:
23480 : Network - network initialized with one of the network creation funcs
23481 : X - input vector, length of array must be at least NIn
23482 : DesiredY- desired outputs, length of array must be at least NOut
23483 : Grad - possibly preallocated array. If size of array is smaller
23484 : than WCount, it will be reallocated. It is recommended to
23485 : reuse previously allocated array to reduce allocation
23486 : overhead.
23487 :
23488 : OUTPUT PARAMETERS:
23489 : E - error function, sum-of-squares for regression networks,
23490 : cross-entropy for classification networks.
23491 : Grad - gradient of E with respect to weights of network, array[WCount]
23492 :
23493 : -- ALGLIB --
23494 : Copyright 04.11.2007 by Bochkanov Sergey
23495 : *************************************************************************/
23496 0 : void mlpgradn(multilayerperceptron* network,
23497 : /* Real */ ae_vector* x,
23498 : /* Real */ ae_vector* desiredy,
23499 : double* e,
23500 : /* Real */ ae_vector* grad,
23501 : ae_state *_state)
23502 : {
23503 : double s;
23504 : ae_int_t i;
23505 : ae_int_t nout;
23506 : ae_int_t ntotal;
23507 :
23508 0 : *e = 0;
23509 :
23510 :
23511 : /*
23512 : * Alloc
23513 : */
23514 0 : rvectorsetlengthatleast(grad, network->structinfo.ptr.p_int[4], _state);
23515 :
23516 : /*
23517 : * Prepare dError/dOut, internal structures
23518 : */
23519 0 : mlpprocess(network, x, &network->y, _state);
23520 0 : nout = network->structinfo.ptr.p_int[2];
23521 0 : ntotal = network->structinfo.ptr.p_int[3];
23522 0 : for(i=0; i<=ntotal-1; i++)
23523 : {
23524 0 : network->derror.ptr.p_double[i] = (double)(0);
23525 : }
23526 0 : *e = (double)(0);
23527 0 : if( network->structinfo.ptr.p_int[6]==0 )
23528 : {
23529 :
23530 : /*
23531 : * Regression network, least squares
23532 : */
23533 0 : for(i=0; i<=nout-1; i++)
23534 : {
23535 0 : network->derror.ptr.p_double[ntotal-nout+i] = network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
23536 0 : *e = *e+ae_sqr(network->y.ptr.p_double[i]-desiredy->ptr.p_double[i], _state)/2;
23537 : }
23538 : }
23539 : else
23540 : {
23541 :
23542 : /*
23543 : * Classification network, cross-entropy
23544 : */
23545 0 : s = (double)(0);
23546 0 : for(i=0; i<=nout-1; i++)
23547 : {
23548 0 : s = s+desiredy->ptr.p_double[i];
23549 : }
23550 0 : for(i=0; i<=nout-1; i++)
23551 : {
23552 0 : network->derror.ptr.p_double[ntotal-nout+i] = s*network->y.ptr.p_double[i]-desiredy->ptr.p_double[i];
23553 0 : *e = *e+mlpbase_safecrossentropy(desiredy->ptr.p_double[i], network->y.ptr.p_double[i], _state);
23554 : }
23555 : }
23556 :
23557 : /*
23558 : * gradient
23559 : */
23560 0 : mlpbase_mlpinternalcalculategradient(network, &network->neurons, &network->weights, &network->derror, grad, ae_true, _state);
23561 0 : }
23562 :
23563 :
23564 : /*************************************************************************
23565 : Batch gradient calculation for a set of inputs/outputs
23566 :
23567 : ! COMMERCIAL EDITION OF ALGLIB:
23568 : !
23569 : ! Commercial Edition of ALGLIB includes following important improvements
23570 : ! of this function:
23571 : ! * high-performance native backend with same C# interface (C# version)
23572 : ! * multithreading support (C++ and C# versions)
23573 : !
23574 : ! We recommend you to read 'Working with commercial version' section of
23575 : ! ALGLIB Reference Manual in order to find out how to use performance-
23576 : ! related features provided by commercial edition of ALGLIB.
23577 :
23578 : INPUT PARAMETERS:
23579 : Network - network initialized with one of the network creation funcs
23580 : XY - original dataset in dense format; one sample = one row:
23581 : * first NIn columns contain inputs,
23582 : * for regression problem, next NOut columns store
23583 : desired outputs.
23584 : * for classification problem, next column (just one!)
23585 : stores class number.
23586 : SSize - number of elements in XY
23587 : Grad - possibly preallocated array. If size of array is smaller
23588 : than WCount, it will be reallocated. It is recommended to
23589 : reuse previously allocated array to reduce allocation
23590 : overhead.
23591 :
23592 : OUTPUT PARAMETERS:
23593 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
23594 : Grad - gradient of E with respect to weights of network, array[WCount]
23595 :
23596 : -- ALGLIB --
23597 : Copyright 04.11.2007 by Bochkanov Sergey
23598 : *************************************************************************/
23599 0 : void mlpgradbatch(multilayerperceptron* network,
23600 : /* Real */ ae_matrix* xy,
23601 : ae_int_t ssize,
23602 : double* e,
23603 : /* Real */ ae_vector* grad,
23604 : ae_state *_state)
23605 : {
23606 : ae_frame _frame_block;
23607 : ae_int_t i;
23608 : ae_int_t nin;
23609 : ae_int_t nout;
23610 : ae_int_t wcount;
23611 : ae_int_t subset0;
23612 : ae_int_t subset1;
23613 : ae_int_t subsettype;
23614 : smlpgrad *sgrad;
23615 : ae_smart_ptr _sgrad;
23616 :
23617 0 : ae_frame_make(_state, &_frame_block);
23618 0 : memset(&_sgrad, 0, sizeof(_sgrad));
23619 0 : *e = 0;
23620 0 : ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
23621 :
23622 0 : ae_assert(ssize>=0, "MLPGradBatchSparse: SSize<0", _state);
23623 0 : subset0 = 0;
23624 0 : subset1 = ssize;
23625 0 : subsettype = 0;
23626 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
23627 0 : rvectorsetlengthatleast(grad, wcount, _state);
23628 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23629 0 : while(sgrad!=NULL)
23630 : {
23631 0 : sgrad->f = 0.0;
23632 0 : for(i=0; i<=wcount-1; i++)
23633 : {
23634 0 : sgrad->g.ptr.p_double[i] = 0.0;
23635 : }
23636 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23637 : }
23638 0 : mlpgradbatchx(network, xy, &network->dummysxy, ssize, 0, &network->dummyidx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
23639 0 : *e = 0.0;
23640 0 : for(i=0; i<=wcount-1; i++)
23641 : {
23642 0 : grad->ptr.p_double[i] = 0.0;
23643 : }
23644 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23645 0 : while(sgrad!=NULL)
23646 : {
23647 0 : *e = *e+sgrad->f;
23648 0 : for(i=0; i<=wcount-1; i++)
23649 : {
23650 0 : grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
23651 : }
23652 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23653 : }
23654 0 : ae_frame_leave(_state);
23655 0 : }
23656 :
23657 :
23658 : /*************************************************************************
23659 : Batch gradient calculation for a set of inputs/outputs given by sparse
23660 : matrices
23661 :
23662 : ! COMMERCIAL EDITION OF ALGLIB:
23663 : !
23664 : ! Commercial Edition of ALGLIB includes following important improvements
23665 : ! of this function:
23666 : ! * high-performance native backend with same C# interface (C# version)
23667 : ! * multithreading support (C++ and C# versions)
23668 : !
23669 : ! We recommend you to read 'Working with commercial version' section of
23670 : ! ALGLIB Reference Manual in order to find out how to use performance-
23671 : ! related features provided by commercial edition of ALGLIB.
23672 :
23673 : INPUT PARAMETERS:
23674 : Network - network initialized with one of the network creation funcs
23675 : XY - original dataset in sparse format; one sample = one row:
23676 : * MATRIX MUST BE STORED IN CRS FORMAT
23677 : * first NIn columns contain inputs.
23678 : * for regression problem, next NOut columns store
23679 : desired outputs.
23680 : * for classification problem, next column (just one!)
23681 : stores class number.
23682 : SSize - number of elements in XY
23683 : Grad - possibly preallocated array. If size of array is smaller
23684 : than WCount, it will be reallocated. It is recommended to
23685 : reuse previously allocated array to reduce allocation
23686 : overhead.
23687 :
23688 : OUTPUT PARAMETERS:
23689 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
23690 : Grad - gradient of E with respect to weights of network, array[WCount]
23691 :
23692 : -- ALGLIB --
23693 : Copyright 26.07.2012 by Bochkanov Sergey
23694 : *************************************************************************/
23695 0 : void mlpgradbatchsparse(multilayerperceptron* network,
23696 : sparsematrix* xy,
23697 : ae_int_t ssize,
23698 : double* e,
23699 : /* Real */ ae_vector* grad,
23700 : ae_state *_state)
23701 : {
23702 : ae_frame _frame_block;
23703 : ae_int_t i;
23704 : ae_int_t nin;
23705 : ae_int_t nout;
23706 : ae_int_t wcount;
23707 : ae_int_t subset0;
23708 : ae_int_t subset1;
23709 : ae_int_t subsettype;
23710 : smlpgrad *sgrad;
23711 : ae_smart_ptr _sgrad;
23712 :
23713 0 : ae_frame_make(_state, &_frame_block);
23714 0 : memset(&_sgrad, 0, sizeof(_sgrad));
23715 0 : *e = 0;
23716 0 : ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
23717 :
23718 0 : ae_assert(ssize>=0, "MLPGradBatchSparse: SSize<0", _state);
23719 0 : ae_assert(sparseiscrs(xy, _state), "MLPGradBatchSparse: sparse matrix XY must be in CRS format.", _state);
23720 0 : subset0 = 0;
23721 0 : subset1 = ssize;
23722 0 : subsettype = 0;
23723 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
23724 0 : rvectorsetlengthatleast(grad, wcount, _state);
23725 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23726 0 : while(sgrad!=NULL)
23727 : {
23728 0 : sgrad->f = 0.0;
23729 0 : for(i=0; i<=wcount-1; i++)
23730 : {
23731 0 : sgrad->g.ptr.p_double[i] = 0.0;
23732 : }
23733 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23734 : }
23735 0 : mlpgradbatchx(network, &network->dummydxy, xy, ssize, 1, &network->dummyidx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
23736 0 : *e = 0.0;
23737 0 : for(i=0; i<=wcount-1; i++)
23738 : {
23739 0 : grad->ptr.p_double[i] = 0.0;
23740 : }
23741 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23742 0 : while(sgrad!=NULL)
23743 : {
23744 0 : *e = *e+sgrad->f;
23745 0 : for(i=0; i<=wcount-1; i++)
23746 : {
23747 0 : grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
23748 : }
23749 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23750 : }
23751 0 : ae_frame_leave(_state);
23752 0 : }
23753 :
23754 :
23755 : /*************************************************************************
23756 : Batch gradient calculation for a subset of dataset
23757 :
23758 : ! COMMERCIAL EDITION OF ALGLIB:
23759 : !
23760 : ! Commercial Edition of ALGLIB includes following important improvements
23761 : ! of this function:
23762 : ! * high-performance native backend with same C# interface (C# version)
23763 : ! * multithreading support (C++ and C# versions)
23764 : !
23765 : ! We recommend you to read 'Working with commercial version' section of
23766 : ! ALGLIB Reference Manual in order to find out how to use performance-
23767 : ! related features provided by commercial edition of ALGLIB.
23768 :
23769 : INPUT PARAMETERS:
23770 : Network - network initialized with one of the network creation funcs
23771 : XY - original dataset in dense format; one sample = one row:
23772 : * first NIn columns contain inputs,
23773 : * for regression problem, next NOut columns store
23774 : desired outputs.
23775 : * for classification problem, next column (just one!)
23776 : stores class number.
23777 : SetSize - real size of XY, SetSize>=0;
23778 : Idx - subset of SubsetSize elements, array[SubsetSize]:
23779 : * Idx[I] stores row index in the original dataset which is
23780 : given by XY. Gradient is calculated with respect to rows
23781 : whose indexes are stored in Idx[].
23782 : * Idx[] must store correct indexes; this function throws
23783 : an exception in case incorrect index (less than 0 or
23784 : larger than rows(XY)) is given
23785 : * Idx[] may store indexes in any order and even with
23786 : repetitions.
23787 : SubsetSize- number of elements in Idx[] array:
23788 : * positive value means that subset given by Idx[] is processed
23789 : * zero value results in zero gradient
23790 : * negative value means that full dataset is processed
23791 : Grad - possibly preallocated array. If size of array is smaller
23792 : than WCount, it will be reallocated. It is recommended to
23793 : reuse previously allocated array to reduce allocation
23794 : overhead.
23795 :
23796 : OUTPUT PARAMETERS:
23797 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
23798 : Grad - gradient of E with respect to weights of network,
23799 : array[WCount]
23800 :
23801 : -- ALGLIB --
23802 : Copyright 26.07.2012 by Bochkanov Sergey
23803 : *************************************************************************/
23804 0 : void mlpgradbatchsubset(multilayerperceptron* network,
23805 : /* Real */ ae_matrix* xy,
23806 : ae_int_t setsize,
23807 : /* Integer */ ae_vector* idx,
23808 : ae_int_t subsetsize,
23809 : double* e,
23810 : /* Real */ ae_vector* grad,
23811 : ae_state *_state)
23812 : {
23813 : ae_frame _frame_block;
23814 : ae_int_t i;
23815 : ae_int_t nin;
23816 : ae_int_t nout;
23817 : ae_int_t wcount;
23818 : ae_int_t npoints;
23819 : ae_int_t subset0;
23820 : ae_int_t subset1;
23821 : ae_int_t subsettype;
23822 : smlpgrad *sgrad;
23823 : ae_smart_ptr _sgrad;
23824 :
23825 0 : ae_frame_make(_state, &_frame_block);
23826 0 : memset(&_sgrad, 0, sizeof(_sgrad));
23827 0 : *e = 0;
23828 0 : ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
23829 :
23830 0 : ae_assert(setsize>=0, "MLPGradBatchSubset: SetSize<0", _state);
23831 0 : ae_assert(subsetsize<=idx->cnt, "MLPGradBatchSubset: SubsetSize>Length(Idx)", _state);
23832 0 : npoints = setsize;
23833 0 : if( subsetsize<0 )
23834 : {
23835 0 : subset0 = 0;
23836 0 : subset1 = setsize;
23837 0 : subsettype = 0;
23838 : }
23839 : else
23840 : {
23841 0 : subset0 = 0;
23842 0 : subset1 = subsetsize;
23843 0 : subsettype = 1;
23844 0 : for(i=0; i<=subsetsize-1; i++)
23845 : {
23846 0 : ae_assert(idx->ptr.p_int[i]>=0, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]<0)", _state);
23847 0 : ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
23848 : }
23849 : }
23850 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
23851 0 : rvectorsetlengthatleast(grad, wcount, _state);
23852 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23853 0 : while(sgrad!=NULL)
23854 : {
23855 0 : sgrad->f = 0.0;
23856 0 : for(i=0; i<=wcount-1; i++)
23857 : {
23858 0 : sgrad->g.ptr.p_double[i] = 0.0;
23859 : }
23860 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23861 : }
23862 0 : mlpgradbatchx(network, xy, &network->dummysxy, setsize, 0, idx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
23863 0 : *e = 0.0;
23864 0 : for(i=0; i<=wcount-1; i++)
23865 : {
23866 0 : grad->ptr.p_double[i] = 0.0;
23867 : }
23868 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23869 0 : while(sgrad!=NULL)
23870 : {
23871 0 : *e = *e+sgrad->f;
23872 0 : for(i=0; i<=wcount-1; i++)
23873 : {
23874 0 : grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
23875 : }
23876 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23877 : }
23878 0 : ae_frame_leave(_state);
23879 0 : }
23880 :
23881 :
23882 : /*************************************************************************
23883 : Batch gradient calculation for a set of inputs/outputs for a subset of
23884 : dataset given by set of indexes.
23885 :
23886 : ! COMMERCIAL EDITION OF ALGLIB:
23887 : !
23888 : ! Commercial Edition of ALGLIB includes following important improvements
23889 : ! of this function:
23890 : ! * high-performance native backend with same C# interface (C# version)
23891 : ! * multithreading support (C++ and C# versions)
23892 : !
23893 : ! We recommend you to read 'Working with commercial version' section of
23894 : ! ALGLIB Reference Manual in order to find out how to use performance-
23895 : ! related features provided by commercial edition of ALGLIB.
23896 :
23897 : INPUT PARAMETERS:
23898 : Network - network initialized with one of the network creation funcs
23899 : XY - original dataset in sparse format; one sample = one row:
23900 : * MATRIX MUST BE STORED IN CRS FORMAT
23901 : * first NIn columns contain inputs,
23902 : * for regression problem, next NOut columns store
23903 : desired outputs.
23904 : * for classification problem, next column (just one!)
23905 : stores class number.
23906 : SetSize - real size of XY, SetSize>=0;
23907 : Idx - subset of SubsetSize elements, array[SubsetSize]:
23908 : * Idx[I] stores row index in the original dataset which is
23909 : given by XY. Gradient is calculated with respect to rows
23910 : whose indexes are stored in Idx[].
23911 : * Idx[] must store correct indexes; this function throws
23912 : an exception in case incorrect index (less than 0 or
23913 : larger than rows(XY)) is given
23914 : * Idx[] may store indexes in any order and even with
23915 : repetitions.
23916 : SubsetSize- number of elements in Idx[] array:
23917 : * positive value means that subset given by Idx[] is processed
23918 : * zero value results in zero gradient
23919 : * negative value means that full dataset is processed
23920 : Grad - possibly preallocated array. If size of array is smaller
23921 : than WCount, it will be reallocated. It is recommended to
23922 : reuse previously allocated array to reduce allocation
23923 : overhead.
23924 :
23925 : OUTPUT PARAMETERS:
23926 : E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
23927 : Grad - gradient of E with respect to weights of network,
23928 : array[WCount]
23929 :
23930 : NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse
23931 : function.
23932 :
23933 : -- ALGLIB --
23934 : Copyright 26.07.2012 by Bochkanov Sergey
23935 : *************************************************************************/
23936 0 : void mlpgradbatchsparsesubset(multilayerperceptron* network,
23937 : sparsematrix* xy,
23938 : ae_int_t setsize,
23939 : /* Integer */ ae_vector* idx,
23940 : ae_int_t subsetsize,
23941 : double* e,
23942 : /* Real */ ae_vector* grad,
23943 : ae_state *_state)
23944 : {
23945 : ae_frame _frame_block;
23946 : ae_int_t i;
23947 : ae_int_t nin;
23948 : ae_int_t nout;
23949 : ae_int_t wcount;
23950 : ae_int_t npoints;
23951 : ae_int_t subset0;
23952 : ae_int_t subset1;
23953 : ae_int_t subsettype;
23954 : smlpgrad *sgrad;
23955 : ae_smart_ptr _sgrad;
23956 :
23957 0 : ae_frame_make(_state, &_frame_block);
23958 0 : memset(&_sgrad, 0, sizeof(_sgrad));
23959 0 : *e = 0;
23960 0 : ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
23961 :
23962 0 : ae_assert(setsize>=0, "MLPGradBatchSparseSubset: SetSize<0", _state);
23963 0 : ae_assert(subsetsize<=idx->cnt, "MLPGradBatchSparseSubset: SubsetSize>Length(Idx)", _state);
23964 0 : ae_assert(sparseiscrs(xy, _state), "MLPGradBatchSparseSubset: sparse matrix XY must be in CRS format.", _state);
23965 0 : npoints = setsize;
23966 0 : if( subsetsize<0 )
23967 : {
23968 0 : subset0 = 0;
23969 0 : subset1 = setsize;
23970 0 : subsettype = 0;
23971 : }
23972 : else
23973 : {
23974 0 : subset0 = 0;
23975 0 : subset1 = subsetsize;
23976 0 : subsettype = 1;
23977 0 : for(i=0; i<=subsetsize-1; i++)
23978 : {
23979 0 : ae_assert(idx->ptr.p_int[i]>=0, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]<0)", _state);
23980 0 : ae_assert(idx->ptr.p_int[i]<=npoints-1, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)", _state);
23981 : }
23982 : }
23983 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
23984 0 : rvectorsetlengthatleast(grad, wcount, _state);
23985 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
23986 0 : while(sgrad!=NULL)
23987 : {
23988 0 : sgrad->f = 0.0;
23989 0 : for(i=0; i<=wcount-1; i++)
23990 : {
23991 0 : sgrad->g.ptr.p_double[i] = 0.0;
23992 : }
23993 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
23994 : }
23995 0 : mlpgradbatchx(network, &network->dummydxy, xy, setsize, 1, idx, subset0, subset1, subsettype, &network->buf, &network->gradbuf, _state);
23996 0 : *e = 0.0;
23997 0 : for(i=0; i<=wcount-1; i++)
23998 : {
23999 0 : grad->ptr.p_double[i] = 0.0;
24000 : }
24001 0 : ae_shared_pool_first_recycled(&network->gradbuf, &_sgrad, _state);
24002 0 : while(sgrad!=NULL)
24003 : {
24004 0 : *e = *e+sgrad->f;
24005 0 : for(i=0; i<=wcount-1; i++)
24006 : {
24007 0 : grad->ptr.p_double[i] = grad->ptr.p_double[i]+sgrad->g.ptr.p_double[i];
24008 : }
24009 0 : ae_shared_pool_next_recycled(&network->gradbuf, &_sgrad, _state);
24010 : }
24011 0 : ae_frame_leave(_state);
24012 0 : }
24013 :
24014 :
24015 : /*************************************************************************
24016 : Internal function which actually calculates batch gradient for a subset or
24017 : full dataset, which can be represented in different formats.
24018 :
24019 : THIS FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS!
24020 :
24021 : -- ALGLIB --
24022 : Copyright 26.07.2012 by Bochkanov Sergey
24023 : *************************************************************************/
24024 0 : void mlpgradbatchx(multilayerperceptron* network,
24025 : /* Real */ ae_matrix* densexy,
24026 : sparsematrix* sparsexy,
24027 : ae_int_t datasetsize,
24028 : ae_int_t datasettype,
24029 : /* Integer */ ae_vector* idx,
24030 : ae_int_t subset0,
24031 : ae_int_t subset1,
24032 : ae_int_t subsettype,
24033 : ae_shared_pool* buf,
24034 : ae_shared_pool* gradbuf,
24035 : ae_state *_state)
24036 : {
24037 : ae_frame _frame_block;
24038 : ae_int_t nin;
24039 : ae_int_t nout;
24040 : ae_int_t wcount;
24041 : ae_int_t rowsize;
24042 : ae_int_t srcidx;
24043 : ae_int_t cstart;
24044 : ae_int_t csize;
24045 : ae_int_t j;
24046 : double problemcost;
24047 : mlpbuffers *buf2;
24048 : ae_smart_ptr _buf2;
24049 : ae_int_t len0;
24050 : ae_int_t len1;
24051 : mlpbuffers *pbuf;
24052 : ae_smart_ptr _pbuf;
24053 : smlpgrad *sgrad;
24054 : ae_smart_ptr _sgrad;
24055 :
24056 0 : ae_frame_make(_state, &_frame_block);
24057 0 : memset(&_buf2, 0, sizeof(_buf2));
24058 0 : memset(&_pbuf, 0, sizeof(_pbuf));
24059 0 : memset(&_sgrad, 0, sizeof(_sgrad));
24060 0 : ae_smart_ptr_init(&_buf2, (void**)&buf2, _state, ae_true);
24061 0 : ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
24062 0 : ae_smart_ptr_init(&_sgrad, (void**)&sgrad, _state, ae_true);
24063 :
24064 0 : ae_assert(datasetsize>=0, "MLPGradBatchX: SetSize<0", _state);
24065 0 : ae_assert(datasettype==0||datasettype==1, "MLPGradBatchX: DatasetType is incorrect", _state);
24066 0 : ae_assert(subsettype==0||subsettype==1, "MLPGradBatchX: SubsetType is incorrect", _state);
24067 :
24068 : /*
24069 : * Determine network and dataset properties
24070 : */
24071 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
24072 0 : if( mlpissoftmax(network, _state) )
24073 : {
24074 0 : rowsize = nin+1;
24075 : }
24076 : else
24077 : {
24078 0 : rowsize = nin+nout;
24079 : }
24080 :
24081 : /*
24082 : * Split problem.
24083 : *
24084 : * Splitting problem allows us to reduce effect of single-precision
24085 : * arithmetics (SSE-optimized version of MLPChunkedGradient uses single
24086 : * precision internally, but converts them to double precision after
24087 : * results are exported from HPC buffer to network). Small batches are
24088 : * calculated in single precision, results are aggregated in double
24089 : * precision, and it allows us to avoid accumulation of errors when
24090 : * we process very large batches (tens of thousands of items).
24091 : *
24092 : * NOTE: it is important to use real arithmetics for ProblemCost
24093 : * because ProblemCost may be larger than MAXINT.
24094 : */
24095 0 : problemcost = (double)(subset1-subset0);
24096 0 : problemcost = problemcost*wcount*2;
24097 0 : if( ae_fp_greater_eq(problemcost,smpactivationlevel(_state))&&subset1-subset0>=2*mlpbase_microbatchsize )
24098 : {
24099 0 : if( _trypexec_mlpgradbatchx(network,densexy,sparsexy,datasetsize,datasettype,idx,subset0,subset1,subsettype,buf,gradbuf, _state) )
24100 : {
24101 0 : ae_frame_leave(_state);
24102 0 : return;
24103 : }
24104 : }
24105 0 : if( subset1-subset0>=2*mlpbase_microbatchsize&&ae_fp_greater(problemcost,spawnlevel(_state)) )
24106 : {
24107 0 : splitlength(subset1-subset0, mlpbase_microbatchsize, &len0, &len1, _state);
24108 0 : mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0+len0, subsettype, buf, gradbuf, _state);
24109 0 : mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0+len0, subset1, subsettype, buf, gradbuf, _state);
24110 0 : ae_frame_leave(_state);
24111 0 : return;
24112 : }
24113 :
24114 : /*
24115 : * Chunked processing
24116 : */
24117 0 : ae_shared_pool_retrieve(gradbuf, &_sgrad, _state);
24118 0 : ae_shared_pool_retrieve(buf, &_pbuf, _state);
24119 0 : hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
24120 0 : cstart = subset0;
24121 0 : while(cstart<subset1)
24122 : {
24123 :
24124 : /*
24125 : * Determine size of current chunk and copy it to PBuf.XY
24126 : */
24127 0 : csize = ae_minint(subset1, cstart+pbuf->chunksize, _state)-cstart;
24128 0 : for(j=0; j<=csize-1; j++)
24129 : {
24130 0 : srcidx = -1;
24131 0 : if( subsettype==0 )
24132 : {
24133 0 : srcidx = cstart+j;
24134 : }
24135 0 : if( subsettype==1 )
24136 : {
24137 0 : srcidx = idx->ptr.p_int[cstart+j];
24138 : }
24139 0 : ae_assert(srcidx>=0, "MLPGradBatchX: internal error", _state);
24140 0 : if( datasettype==0 )
24141 : {
24142 0 : ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,rowsize-1));
24143 : }
24144 0 : if( datasettype==1 )
24145 : {
24146 0 : sparsegetrow(sparsexy, srcidx, &pbuf->xyrow, _state);
24147 0 : ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &pbuf->xyrow.ptr.p_double[0], 1, ae_v_len(0,rowsize-1));
24148 : }
24149 : }
24150 :
24151 : /*
24152 : * Process chunk and advance line pointer
24153 : */
24154 0 : mlpbase_mlpchunkedgradient(network, &pbuf->xy, 0, csize, &pbuf->batch4buf, &pbuf->hpcbuf, &sgrad->f, ae_false, _state);
24155 0 : cstart = cstart+pbuf->chunksize;
24156 : }
24157 0 : hpcfinalizechunkedgradient(pbuf, &sgrad->g, _state);
24158 0 : ae_shared_pool_recycle(buf, &_pbuf, _state);
24159 0 : ae_shared_pool_recycle(gradbuf, &_sgrad, _state);
24160 0 : ae_frame_leave(_state);
24161 : }
24162 :
24163 :
24164 : /*************************************************************************
24165 : Serial stub for GPL edition.
24166 : *************************************************************************/
24167 0 : ae_bool _trypexec_mlpgradbatchx(multilayerperceptron* network,
24168 : /* Real */ ae_matrix* densexy,
24169 : sparsematrix* sparsexy,
24170 : ae_int_t datasetsize,
24171 : ae_int_t datasettype,
24172 : /* Integer */ ae_vector* idx,
24173 : ae_int_t subset0,
24174 : ae_int_t subset1,
24175 : ae_int_t subsettype,
24176 : ae_shared_pool* buf,
24177 : ae_shared_pool* gradbuf,
24178 : ae_state *_state)
24179 : {
24180 0 : return ae_false;
24181 : }
24182 :
24183 :
24184 : /*************************************************************************
24185 : Batch gradient calculation for a set of inputs/outputs
24186 : (natural error function is used)
24187 :
24188 : INPUT PARAMETERS:
24189 : Network - network initialized with one of the network creation funcs
24190 : XY - set of inputs/outputs; one sample = one row;
24191 : first NIn columns contain inputs,
24192 : next NOut columns - desired outputs.
24193 : SSize - number of elements in XY
24194 : Grad - possibly preallocated array. If size of array is smaller
24195 : than WCount, it will be reallocated. It is recommended to
24196 : reuse previously allocated array to reduce allocation
24197 : overhead.
24198 :
24199 : OUTPUT PARAMETERS:
24200 : E - error function, sum-of-squares for regression networks,
24201 : cross-entropy for classification networks.
24202 : Grad - gradient of E with respect to weights of network, array[WCount]
24203 :
24204 : -- ALGLIB --
24205 : Copyright 04.11.2007 by Bochkanov Sergey
24206 : *************************************************************************/
24207 0 : void mlpgradnbatch(multilayerperceptron* network,
24208 : /* Real */ ae_matrix* xy,
24209 : ae_int_t ssize,
24210 : double* e,
24211 : /* Real */ ae_vector* grad,
24212 : ae_state *_state)
24213 : {
24214 : ae_frame _frame_block;
24215 : ae_int_t i;
24216 : ae_int_t nin;
24217 : ae_int_t nout;
24218 : ae_int_t wcount;
24219 : mlpbuffers *pbuf;
24220 : ae_smart_ptr _pbuf;
24221 :
24222 0 : ae_frame_make(_state, &_frame_block);
24223 0 : memset(&_pbuf, 0, sizeof(_pbuf));
24224 0 : *e = 0;
24225 0 : ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
24226 :
24227 :
24228 : /*
24229 : * Alloc
24230 : */
24231 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
24232 0 : ae_shared_pool_retrieve(&network->buf, &_pbuf, _state);
24233 0 : hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
24234 0 : rvectorsetlengthatleast(grad, wcount, _state);
24235 0 : for(i=0; i<=wcount-1; i++)
24236 : {
24237 0 : grad->ptr.p_double[i] = (double)(0);
24238 : }
24239 0 : *e = (double)(0);
24240 0 : i = 0;
24241 0 : while(i<=ssize-1)
24242 : {
24243 0 : mlpbase_mlpchunkedgradient(network, xy, i, ae_minint(ssize, i+pbuf->chunksize, _state)-i, &pbuf->batch4buf, &pbuf->hpcbuf, e, ae_true, _state);
24244 0 : i = i+pbuf->chunksize;
24245 : }
24246 0 : hpcfinalizechunkedgradient(pbuf, grad, _state);
24247 0 : ae_shared_pool_recycle(&network->buf, &_pbuf, _state);
24248 0 : ae_frame_leave(_state);
24249 0 : }
24250 :
24251 :
24252 : /*************************************************************************
24253 : Batch Hessian calculation (natural error function) using R-algorithm.
24254 : Internal subroutine.
24255 :
24256 : -- ALGLIB --
24257 : Copyright 26.01.2008 by Bochkanov Sergey.
24258 :
24259 : Hessian calculation based on R-algorithm described in
24260 : "Fast Exact Multiplication by the Hessian",
24261 : B. A. Pearlmutter,
24262 : Neural Computation, 1994.
24263 : *************************************************************************/
24264 0 : void mlphessiannbatch(multilayerperceptron* network,
24265 : /* Real */ ae_matrix* xy,
24266 : ae_int_t ssize,
24267 : double* e,
24268 : /* Real */ ae_vector* grad,
24269 : /* Real */ ae_matrix* h,
24270 : ae_state *_state)
24271 : {
24272 :
24273 0 : *e = 0;
24274 :
24275 0 : mlpbase_mlphessianbatchinternal(network, xy, ssize, ae_true, e, grad, h, _state);
24276 0 : }
24277 :
24278 :
24279 : /*************************************************************************
24280 : Batch Hessian calculation using R-algorithm.
24281 : Internal subroutine.
24282 :
24283 : -- ALGLIB --
24284 : Copyright 26.01.2008 by Bochkanov Sergey.
24285 :
24286 : Hessian calculation based on R-algorithm described in
24287 : "Fast Exact Multiplication by the Hessian",
24288 : B. A. Pearlmutter,
24289 : Neural Computation, 1994.
24290 : *************************************************************************/
24291 0 : void mlphessianbatch(multilayerperceptron* network,
24292 : /* Real */ ae_matrix* xy,
24293 : ae_int_t ssize,
24294 : double* e,
24295 : /* Real */ ae_vector* grad,
24296 : /* Real */ ae_matrix* h,
24297 : ae_state *_state)
24298 : {
24299 :
24300 0 : *e = 0;
24301 :
24302 0 : mlpbase_mlphessianbatchinternal(network, xy, ssize, ae_false, e, grad, h, _state);
24303 0 : }
24304 :
24305 :
24306 : /*************************************************************************
24307 : Internal subroutine, shouldn't be called by user.
24308 : *************************************************************************/
24309 0 : void mlpinternalprocessvector(/* Integer */ ae_vector* structinfo,
24310 : /* Real */ ae_vector* weights,
24311 : /* Real */ ae_vector* columnmeans,
24312 : /* Real */ ae_vector* columnsigmas,
24313 : /* Real */ ae_vector* neurons,
24314 : /* Real */ ae_vector* dfdnet,
24315 : /* Real */ ae_vector* x,
24316 : /* Real */ ae_vector* y,
24317 : ae_state *_state)
24318 : {
24319 : ae_int_t i;
24320 : ae_int_t n1;
24321 : ae_int_t n2;
24322 : ae_int_t w1;
24323 : ae_int_t w2;
24324 : ae_int_t ntotal;
24325 : ae_int_t nin;
24326 : ae_int_t nout;
24327 : ae_int_t istart;
24328 : ae_int_t offs;
24329 : double net;
24330 : double f;
24331 : double df;
24332 : double d2f;
24333 : double mx;
24334 : ae_bool perr;
24335 :
24336 :
24337 :
24338 : /*
24339 : * Read network geometry
24340 : */
24341 0 : nin = structinfo->ptr.p_int[1];
24342 0 : nout = structinfo->ptr.p_int[2];
24343 0 : ntotal = structinfo->ptr.p_int[3];
24344 0 : istart = structinfo->ptr.p_int[5];
24345 :
24346 : /*
24347 : * Inputs standartisation and putting in the network
24348 : */
24349 0 : for(i=0; i<=nin-1; i++)
24350 : {
24351 0 : if( ae_fp_neq(columnsigmas->ptr.p_double[i],(double)(0)) )
24352 : {
24353 0 : neurons->ptr.p_double[i] = (x->ptr.p_double[i]-columnmeans->ptr.p_double[i])/columnsigmas->ptr.p_double[i];
24354 : }
24355 : else
24356 : {
24357 0 : neurons->ptr.p_double[i] = x->ptr.p_double[i]-columnmeans->ptr.p_double[i];
24358 : }
24359 : }
24360 :
24361 : /*
24362 : * Process network
24363 : */
24364 0 : for(i=0; i<=ntotal-1; i++)
24365 : {
24366 0 : offs = istart+i*mlpbase_nfieldwidth;
24367 0 : if( structinfo->ptr.p_int[offs+0]>0||structinfo->ptr.p_int[offs+0]==-5 )
24368 : {
24369 :
24370 : /*
24371 : * Activation function
24372 : */
24373 0 : mlpactivationfunction(neurons->ptr.p_double[structinfo->ptr.p_int[offs+2]], structinfo->ptr.p_int[offs+0], &f, &df, &d2f, _state);
24374 0 : neurons->ptr.p_double[i] = f;
24375 0 : dfdnet->ptr.p_double[i] = df;
24376 0 : continue;
24377 : }
24378 0 : if( structinfo->ptr.p_int[offs+0]==0 )
24379 : {
24380 :
24381 : /*
24382 : * Adaptive summator
24383 : */
24384 0 : n1 = structinfo->ptr.p_int[offs+2];
24385 0 : n2 = n1+structinfo->ptr.p_int[offs+1]-1;
24386 0 : w1 = structinfo->ptr.p_int[offs+3];
24387 0 : w2 = w1+structinfo->ptr.p_int[offs+1]-1;
24388 0 : net = ae_v_dotproduct(&weights->ptr.p_double[w1], 1, &neurons->ptr.p_double[n1], 1, ae_v_len(w1,w2));
24389 0 : neurons->ptr.p_double[i] = net;
24390 0 : dfdnet->ptr.p_double[i] = 1.0;
24391 0 : touchint(&n2, _state);
24392 0 : continue;
24393 : }
24394 0 : if( structinfo->ptr.p_int[offs+0]<0 )
24395 : {
24396 0 : perr = ae_true;
24397 0 : if( structinfo->ptr.p_int[offs+0]==-2 )
24398 : {
24399 :
24400 : /*
24401 : * input neuron, left unchanged
24402 : */
24403 0 : perr = ae_false;
24404 : }
24405 0 : if( structinfo->ptr.p_int[offs+0]==-3 )
24406 : {
24407 :
24408 : /*
24409 : * "-1" neuron
24410 : */
24411 0 : neurons->ptr.p_double[i] = (double)(-1);
24412 0 : perr = ae_false;
24413 : }
24414 0 : if( structinfo->ptr.p_int[offs+0]==-4 )
24415 : {
24416 :
24417 : /*
24418 : * "0" neuron
24419 : */
24420 0 : neurons->ptr.p_double[i] = (double)(0);
24421 0 : perr = ae_false;
24422 : }
24423 0 : ae_assert(!perr, "MLPInternalProcessVector: internal error - unknown neuron type!", _state);
24424 0 : continue;
24425 : }
24426 : }
24427 :
24428 : /*
24429 : * Extract result
24430 : */
24431 0 : ae_v_move(&y->ptr.p_double[0], 1, &neurons->ptr.p_double[ntotal-nout], 1, ae_v_len(0,nout-1));
24432 :
24433 : /*
24434 : * Softmax post-processing or standardisation if needed
24435 : */
24436 0 : ae_assert(structinfo->ptr.p_int[6]==0||structinfo->ptr.p_int[6]==1, "MLPInternalProcessVector: unknown normalization type!", _state);
24437 0 : if( structinfo->ptr.p_int[6]==1 )
24438 : {
24439 :
24440 : /*
24441 : * Softmax
24442 : */
24443 0 : mx = y->ptr.p_double[0];
24444 0 : for(i=1; i<=nout-1; i++)
24445 : {
24446 0 : mx = ae_maxreal(mx, y->ptr.p_double[i], _state);
24447 : }
24448 0 : net = (double)(0);
24449 0 : for(i=0; i<=nout-1; i++)
24450 : {
24451 0 : y->ptr.p_double[i] = ae_exp(y->ptr.p_double[i]-mx, _state);
24452 0 : net = net+y->ptr.p_double[i];
24453 : }
24454 0 : for(i=0; i<=nout-1; i++)
24455 : {
24456 0 : y->ptr.p_double[i] = y->ptr.p_double[i]/net;
24457 : }
24458 : }
24459 : else
24460 : {
24461 :
24462 : /*
24463 : * Standardisation
24464 : */
24465 0 : for(i=0; i<=nout-1; i++)
24466 : {
24467 0 : y->ptr.p_double[i] = y->ptr.p_double[i]*columnsigmas->ptr.p_double[nin+i]+columnmeans->ptr.p_double[nin+i];
24468 : }
24469 : }
24470 0 : }
24471 :
24472 :
24473 : /*************************************************************************
24474 : Serializer: allocation
24475 :
24476 : -- ALGLIB --
24477 : Copyright 14.03.2011 by Bochkanov Sergey
24478 : *************************************************************************/
24479 0 : void mlpalloc(ae_serializer* s,
24480 : multilayerperceptron* network,
24481 : ae_state *_state)
24482 : {
24483 : ae_int_t i;
24484 : ae_int_t j;
24485 : ae_int_t k;
24486 : ae_int_t fkind;
24487 : double threshold;
24488 : double v0;
24489 : double v1;
24490 : ae_int_t nin;
24491 : ae_int_t nout;
24492 :
24493 :
24494 0 : nin = network->hllayersizes.ptr.p_int[0];
24495 0 : nout = network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1];
24496 0 : ae_serializer_alloc_entry(s);
24497 0 : ae_serializer_alloc_entry(s);
24498 0 : ae_serializer_alloc_entry(s);
24499 0 : allocintegerarray(s, &network->hllayersizes, -1, _state);
24500 0 : for(i=1; i<=network->hllayersizes.cnt-1; i++)
24501 : {
24502 0 : for(j=0; j<=network->hllayersizes.ptr.p_int[i]-1; j++)
24503 : {
24504 0 : mlpgetneuroninfo(network, i, j, &fkind, &threshold, _state);
24505 0 : ae_serializer_alloc_entry(s);
24506 0 : ae_serializer_alloc_entry(s);
24507 0 : for(k=0; k<=network->hllayersizes.ptr.p_int[i-1]-1; k++)
24508 : {
24509 0 : ae_serializer_alloc_entry(s);
24510 : }
24511 : }
24512 : }
24513 0 : for(j=0; j<=nin-1; j++)
24514 : {
24515 0 : mlpgetinputscaling(network, j, &v0, &v1, _state);
24516 0 : ae_serializer_alloc_entry(s);
24517 0 : ae_serializer_alloc_entry(s);
24518 : }
24519 0 : for(j=0; j<=nout-1; j++)
24520 : {
24521 0 : mlpgetoutputscaling(network, j, &v0, &v1, _state);
24522 0 : ae_serializer_alloc_entry(s);
24523 0 : ae_serializer_alloc_entry(s);
24524 : }
24525 0 : }
24526 :
24527 :
24528 : /*************************************************************************
24529 : Serializer: serialization
24530 :
24531 : -- ALGLIB --
24532 : Copyright 14.03.2011 by Bochkanov Sergey
24533 : *************************************************************************/
24534 0 : void mlpserialize(ae_serializer* s,
24535 : multilayerperceptron* network,
24536 : ae_state *_state)
24537 : {
24538 : ae_int_t i;
24539 : ae_int_t j;
24540 : ae_int_t k;
24541 : ae_int_t fkind;
24542 : double threshold;
24543 : double v0;
24544 : double v1;
24545 : ae_int_t nin;
24546 : ae_int_t nout;
24547 :
24548 :
24549 0 : nin = network->hllayersizes.ptr.p_int[0];
24550 0 : nout = network->hllayersizes.ptr.p_int[network->hllayersizes.cnt-1];
24551 0 : ae_serializer_serialize_int(s, getmlpserializationcode(_state), _state);
24552 0 : ae_serializer_serialize_int(s, mlpbase_mlpfirstversion, _state);
24553 0 : ae_serializer_serialize_bool(s, mlpissoftmax(network, _state), _state);
24554 0 : serializeintegerarray(s, &network->hllayersizes, -1, _state);
24555 0 : for(i=1; i<=network->hllayersizes.cnt-1; i++)
24556 : {
24557 0 : for(j=0; j<=network->hllayersizes.ptr.p_int[i]-1; j++)
24558 : {
24559 0 : mlpgetneuroninfo(network, i, j, &fkind, &threshold, _state);
24560 0 : ae_serializer_serialize_int(s, fkind, _state);
24561 0 : ae_serializer_serialize_double(s, threshold, _state);
24562 0 : for(k=0; k<=network->hllayersizes.ptr.p_int[i-1]-1; k++)
24563 : {
24564 0 : ae_serializer_serialize_double(s, mlpgetweight(network, i-1, k, i, j, _state), _state);
24565 : }
24566 : }
24567 : }
24568 0 : for(j=0; j<=nin-1; j++)
24569 : {
24570 0 : mlpgetinputscaling(network, j, &v0, &v1, _state);
24571 0 : ae_serializer_serialize_double(s, v0, _state);
24572 0 : ae_serializer_serialize_double(s, v1, _state);
24573 : }
24574 0 : for(j=0; j<=nout-1; j++)
24575 : {
24576 0 : mlpgetoutputscaling(network, j, &v0, &v1, _state);
24577 0 : ae_serializer_serialize_double(s, v0, _state);
24578 0 : ae_serializer_serialize_double(s, v1, _state);
24579 : }
24580 0 : }
24581 :
24582 :
24583 : /*************************************************************************
24584 : Serializer: unserialization
24585 :
24586 : -- ALGLIB --
24587 : Copyright 14.03.2011 by Bochkanov Sergey
24588 : *************************************************************************/
24589 0 : void mlpunserialize(ae_serializer* s,
24590 : multilayerperceptron* network,
24591 : ae_state *_state)
24592 : {
24593 : ae_frame _frame_block;
24594 : ae_int_t i0;
24595 : ae_int_t i1;
24596 : ae_int_t i;
24597 : ae_int_t j;
24598 : ae_int_t k;
24599 : ae_int_t fkind;
24600 : double threshold;
24601 : double v0;
24602 : double v1;
24603 : ae_int_t nin;
24604 : ae_int_t nout;
24605 : ae_bool issoftmax;
24606 : ae_vector layersizes;
24607 :
24608 0 : ae_frame_make(_state, &_frame_block);
24609 0 : memset(&layersizes, 0, sizeof(layersizes));
24610 0 : _multilayerperceptron_clear(network);
24611 0 : ae_vector_init(&layersizes, 0, DT_INT, _state, ae_true);
24612 :
24613 :
24614 : /*
24615 : * check correctness of header
24616 : */
24617 0 : ae_serializer_unserialize_int(s, &i0, _state);
24618 0 : ae_assert(i0==getmlpserializationcode(_state), "MLPUnserialize: stream header corrupted", _state);
24619 0 : ae_serializer_unserialize_int(s, &i1, _state);
24620 0 : ae_assert(i1==mlpbase_mlpfirstversion, "MLPUnserialize: stream header corrupted", _state);
24621 :
24622 : /*
24623 : * Create network
24624 : */
24625 0 : ae_serializer_unserialize_bool(s, &issoftmax, _state);
24626 0 : unserializeintegerarray(s, &layersizes, _state);
24627 0 : ae_assert((layersizes.cnt==2||layersizes.cnt==3)||layersizes.cnt==4, "MLPUnserialize: too many hidden layers!", _state);
24628 0 : nin = layersizes.ptr.p_int[0];
24629 0 : nout = layersizes.ptr.p_int[layersizes.cnt-1];
24630 0 : if( layersizes.cnt==2 )
24631 : {
24632 0 : if( issoftmax )
24633 : {
24634 0 : mlpcreatec0(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], network, _state);
24635 : }
24636 : else
24637 : {
24638 0 : mlpcreate0(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], network, _state);
24639 : }
24640 : }
24641 0 : if( layersizes.cnt==3 )
24642 : {
24643 0 : if( issoftmax )
24644 : {
24645 0 : mlpcreatec1(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], network, _state);
24646 : }
24647 : else
24648 : {
24649 0 : mlpcreate1(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], network, _state);
24650 : }
24651 : }
24652 0 : if( layersizes.cnt==4 )
24653 : {
24654 0 : if( issoftmax )
24655 : {
24656 0 : mlpcreatec2(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], layersizes.ptr.p_int[3], network, _state);
24657 : }
24658 : else
24659 : {
24660 0 : mlpcreate2(layersizes.ptr.p_int[0], layersizes.ptr.p_int[1], layersizes.ptr.p_int[2], layersizes.ptr.p_int[3], network, _state);
24661 : }
24662 : }
24663 :
24664 : /*
24665 : * Load neurons and weights
24666 : */
24667 0 : for(i=1; i<=layersizes.cnt-1; i++)
24668 : {
24669 0 : for(j=0; j<=layersizes.ptr.p_int[i]-1; j++)
24670 : {
24671 0 : ae_serializer_unserialize_int(s, &fkind, _state);
24672 0 : ae_serializer_unserialize_double(s, &threshold, _state);
24673 0 : mlpsetneuroninfo(network, i, j, fkind, threshold, _state);
24674 0 : for(k=0; k<=layersizes.ptr.p_int[i-1]-1; k++)
24675 : {
24676 0 : ae_serializer_unserialize_double(s, &v0, _state);
24677 0 : mlpsetweight(network, i-1, k, i, j, v0, _state);
24678 : }
24679 : }
24680 : }
24681 :
24682 : /*
24683 : * Load standartizator
24684 : */
24685 0 : for(j=0; j<=nin-1; j++)
24686 : {
24687 0 : ae_serializer_unserialize_double(s, &v0, _state);
24688 0 : ae_serializer_unserialize_double(s, &v1, _state);
24689 0 : mlpsetinputscaling(network, j, v0, v1, _state);
24690 : }
24691 0 : for(j=0; j<=nout-1; j++)
24692 : {
24693 0 : ae_serializer_unserialize_double(s, &v0, _state);
24694 0 : ae_serializer_unserialize_double(s, &v1, _state);
24695 0 : mlpsetoutputscaling(network, j, v0, v1, _state);
24696 : }
24697 0 : ae_frame_leave(_state);
24698 0 : }
24699 :
24700 :
24701 : /*************************************************************************
24702 : Calculation of all types of errors on subset of dataset.
24703 :
24704 : ! COMMERCIAL EDITION OF ALGLIB:
24705 : !
24706 : ! Commercial Edition of ALGLIB includes following important improvements
24707 : ! of this function:
24708 : ! * high-performance native backend with same C# interface (C# version)
24709 : ! * multithreading support (C++ and C# versions)
24710 : !
24711 : ! We recommend you to read 'Working with commercial version' section of
24712 : ! ALGLIB Reference Manual in order to find out how to use performance-
24713 : ! related features provided by commercial edition of ALGLIB.
24714 :
24715 : INPUT PARAMETERS:
24716 : Network - network initialized with one of the network creation funcs
24717 : XY - original dataset; one sample = one row;
24718 : first NIn columns contain inputs,
24719 : next NOut columns - desired outputs.
24720 : SetSize - real size of XY, SetSize>=0;
24721 : Subset - subset of SubsetSize elements, array[SubsetSize];
24722 : SubsetSize- number of elements in Subset[] array:
24723 : * if SubsetSize>0, rows of XY with indices Subset[0]...
24724 : ...Subset[SubsetSize-1] are processed
24725 : * if SubsetSize=0, zeros are returned
24726 : * if SubsetSize<0, entire dataset is processed; Subset[]
24727 : array is ignored in this case.
24728 :
24729 : OUTPUT PARAMETERS:
24730 : Rep - it contains all type of errors.
24731 :
24732 : -- ALGLIB --
24733 : Copyright 04.09.2012 by Bochkanov Sergey
24734 : *************************************************************************/
24735 0 : void mlpallerrorssubset(multilayerperceptron* network,
24736 : /* Real */ ae_matrix* xy,
24737 : ae_int_t setsize,
24738 : /* Integer */ ae_vector* subset,
24739 : ae_int_t subsetsize,
24740 : modelerrors* rep,
24741 : ae_state *_state)
24742 : {
24743 : ae_int_t idx0;
24744 : ae_int_t idx1;
24745 : ae_int_t idxtype;
24746 :
24747 0 : _modelerrors_clear(rep);
24748 :
24749 0 : ae_assert(xy->rows>=setsize, "MLPAllErrorsSubset: XY has less than SetSize rows", _state);
24750 0 : if( setsize>0 )
24751 : {
24752 0 : if( mlpissoftmax(network, _state) )
24753 : {
24754 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPAllErrorsSubset: XY has less than NIn+1 columns", _state);
24755 : }
24756 : else
24757 : {
24758 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAllErrorsSubset: XY has less than NIn+NOut columns", _state);
24759 : }
24760 : }
24761 0 : if( subsetsize>=0 )
24762 : {
24763 0 : idx0 = 0;
24764 0 : idx1 = subsetsize;
24765 0 : idxtype = 1;
24766 : }
24767 : else
24768 : {
24769 0 : idx0 = 0;
24770 0 : idx1 = setsize;
24771 0 : idxtype = 0;
24772 : }
24773 0 : mlpallerrorsx(network, xy, &network->dummysxy, setsize, 0, subset, idx0, idx1, idxtype, &network->buf, rep, _state);
24774 0 : }
24775 :
24776 :
24777 : /*************************************************************************
24778 : Calculation of all types of errors on subset of dataset.
24779 :
24780 : ! COMMERCIAL EDITION OF ALGLIB:
24781 : !
24782 : ! Commercial Edition of ALGLIB includes following important improvements
24783 : ! of this function:
24784 : ! * high-performance native backend with same C# interface (C# version)
24785 : ! * multithreading support (C++ and C# versions)
24786 : !
24787 : ! We recommend you to read 'Working with commercial version' section of
24788 : ! ALGLIB Reference Manual in order to find out how to use performance-
24789 : ! related features provided by commercial edition of ALGLIB.
24790 :
24791 : INPUT PARAMETERS:
24792 : Network - network initialized with one of the network creation funcs
24793 : XY - original dataset given by sparse matrix;
24794 : one sample = one row;
24795 : first NIn columns contain inputs,
24796 : next NOut columns - desired outputs.
24797 : SetSize - real size of XY, SetSize>=0;
24798 : Subset - subset of SubsetSize elements, array[SubsetSize];
24799 : SubsetSize- number of elements in Subset[] array:
24800 : * if SubsetSize>0, rows of XY with indices Subset[0]...
24801 : ...Subset[SubsetSize-1] are processed
24802 : * if SubsetSize=0, zeros are returned
24803 : * if SubsetSize<0, entire dataset is processed; Subset[]
24804 : array is ignored in this case.
24805 :
24806 : OUTPUT PARAMETERS:
24807 : Rep - it contains all type of errors.
24808 :
24809 :
24810 : -- ALGLIB --
24811 : Copyright 04.09.2012 by Bochkanov Sergey
24812 : *************************************************************************/
24813 0 : void mlpallerrorssparsesubset(multilayerperceptron* network,
24814 : sparsematrix* xy,
24815 : ae_int_t setsize,
24816 : /* Integer */ ae_vector* subset,
24817 : ae_int_t subsetsize,
24818 : modelerrors* rep,
24819 : ae_state *_state)
24820 : {
24821 : ae_int_t idx0;
24822 : ae_int_t idx1;
24823 : ae_int_t idxtype;
24824 :
24825 0 : _modelerrors_clear(rep);
24826 :
24827 0 : ae_assert(sparseiscrs(xy, _state), "MLPAllErrorsSparseSubset: XY is not in CRS format.", _state);
24828 0 : ae_assert(sparsegetnrows(xy, _state)>=setsize, "MLPAllErrorsSparseSubset: XY has less than SetSize rows", _state);
24829 0 : if( setsize>0 )
24830 : {
24831 0 : if( mlpissoftmax(network, _state) )
24832 : {
24833 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPAllErrorsSparseSubset: XY has less than NIn+1 columns", _state);
24834 : }
24835 : else
24836 : {
24837 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPAllErrorsSparseSubset: XY has less than NIn+NOut columns", _state);
24838 : }
24839 : }
24840 0 : if( subsetsize>=0 )
24841 : {
24842 0 : idx0 = 0;
24843 0 : idx1 = subsetsize;
24844 0 : idxtype = 1;
24845 : }
24846 : else
24847 : {
24848 0 : idx0 = 0;
24849 0 : idx1 = setsize;
24850 0 : idxtype = 0;
24851 : }
24852 0 : mlpallerrorsx(network, &network->dummydxy, xy, setsize, 1, subset, idx0, idx1, idxtype, &network->buf, rep, _state);
24853 0 : }
24854 :
24855 :
24856 : /*************************************************************************
24857 : Error of the neural network on subset of dataset.
24858 :
24859 : ! COMMERCIAL EDITION OF ALGLIB:
24860 : !
24861 : ! Commercial Edition of ALGLIB includes following important improvements
24862 : ! of this function:
24863 : ! * high-performance native backend with same C# interface (C# version)
24864 : ! * multithreading support (C++ and C# versions)
24865 : !
24866 : ! We recommend you to read 'Working with commercial version' section of
24867 : ! ALGLIB Reference Manual in order to find out how to use performance-
24868 : ! related features provided by commercial edition of ALGLIB.
24869 :
24870 : INPUT PARAMETERS:
24871 : Network - neural network;
24872 : XY - training set, see below for information on the
24873 : training set format;
24874 : SetSize - real size of XY, SetSize>=0;
24875 : Subset - subset of SubsetSize elements, array[SubsetSize];
24876 : SubsetSize- number of elements in Subset[] array:
24877 : * if SubsetSize>0, rows of XY with indices Subset[0]...
24878 : ...Subset[SubsetSize-1] are processed
24879 : * if SubsetSize=0, zeros are returned
24880 : * if SubsetSize<0, entire dataset is processed; Subset[]
24881 : array is ignored in this case.
24882 :
24883 : RESULT:
24884 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
24885 :
24886 : DATASET FORMAT:
24887 :
24888 : This function uses two different dataset formats - one for regression
24889 : networks, another one for classification networks.
24890 :
24891 : For regression networks with NIn inputs and NOut outputs following dataset
24892 : format is used:
24893 : * dataset is given by NPoints*(NIn+NOut) matrix
24894 : * each row corresponds to one example
24895 : * first NIn columns are inputs, next NOut columns are outputs
24896 :
24897 : For classification networks with NIn inputs and NClasses clases following
24898 : dataset format is used:
24899 : * dataset is given by NPoints*(NIn+1) matrix
24900 : * each row corresponds to one example
24901 : * first NIn columns are inputs, last column stores class number (from 0 to
24902 : NClasses-1).
24903 :
24904 : -- ALGLIB --
24905 : Copyright 04.09.2012 by Bochkanov Sergey
24906 : *************************************************************************/
24907 0 : double mlperrorsubset(multilayerperceptron* network,
24908 : /* Real */ ae_matrix* xy,
24909 : ae_int_t setsize,
24910 : /* Integer */ ae_vector* subset,
24911 : ae_int_t subsetsize,
24912 : ae_state *_state)
24913 : {
24914 : ae_int_t idx0;
24915 : ae_int_t idx1;
24916 : ae_int_t idxtype;
24917 : double result;
24918 :
24919 :
24920 0 : ae_assert(xy->rows>=setsize, "MLPErrorSubset: XY has less than SetSize rows", _state);
24921 0 : if( setsize>0 )
24922 : {
24923 0 : if( mlpissoftmax(network, _state) )
24924 : {
24925 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+1, "MLPErrorSubset: XY has less than NIn+1 columns", _state);
24926 : }
24927 : else
24928 : {
24929 0 : ae_assert(xy->cols>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSubset: XY has less than NIn+NOut columns", _state);
24930 : }
24931 : }
24932 0 : if( subsetsize>=0 )
24933 : {
24934 0 : idx0 = 0;
24935 0 : idx1 = subsetsize;
24936 0 : idxtype = 1;
24937 : }
24938 : else
24939 : {
24940 0 : idx0 = 0;
24941 0 : idx1 = setsize;
24942 0 : idxtype = 0;
24943 : }
24944 0 : mlpallerrorsx(network, xy, &network->dummysxy, setsize, 0, subset, idx0, idx1, idxtype, &network->buf, &network->err, _state);
24945 0 : result = ae_sqr(network->err.rmserror, _state)*(idx1-idx0)*mlpgetoutputscount(network, _state)/2;
24946 0 : return result;
24947 : }
24948 :
24949 :
24950 : /*************************************************************************
24951 : Error of the neural network on subset of sparse dataset.
24952 :
24953 : ! COMMERCIAL EDITION OF ALGLIB:
24954 : !
24955 : ! Commercial Edition of ALGLIB includes following important improvements
24956 : ! of this function:
24957 : ! * high-performance native backend with same C# interface (C# version)
24958 : ! * multithreading support (C++ and C# versions)
24959 : !
24960 : ! We recommend you to read 'Working with commercial version' section of
24961 : ! ALGLIB Reference Manual in order to find out how to use performance-
24962 : ! related features provided by commercial edition of ALGLIB.
24963 :
24964 : INPUT PARAMETERS:
24965 : Network - neural network;
24966 : XY - training set, see below for information on the
24967 : training set format. This function checks correctness
24968 : of the dataset (no NANs/INFs, class numbers are
24969 : correct) and throws exception when incorrect dataset
24970 : is passed. Sparse matrix must use CRS format for
24971 : storage.
24972 : SetSize - real size of XY, SetSize>=0;
24973 : it is used when SubsetSize<0;
24974 : Subset - subset of SubsetSize elements, array[SubsetSize];
24975 : SubsetSize- number of elements in Subset[] array:
24976 : * if SubsetSize>0, rows of XY with indices Subset[0]...
24977 : ...Subset[SubsetSize-1] are processed
24978 : * if SubsetSize=0, zeros are returned
24979 : * if SubsetSize<0, entire dataset is processed; Subset[]
24980 : array is ignored in this case.
24981 :
24982 : RESULT:
24983 : sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
24984 :
24985 : DATASET FORMAT:
24986 :
24987 : This function uses two different dataset formats - one for regression
24988 : networks, another one for classification networks.
24989 :
24990 : For regression networks with NIn inputs and NOut outputs following dataset
24991 : format is used:
24992 : * dataset is given by NPoints*(NIn+NOut) matrix
24993 : * each row corresponds to one example
24994 : * first NIn columns are inputs, next NOut columns are outputs
24995 :
24996 : For classification networks with NIn inputs and NClasses clases following
24997 : dataset format is used:
24998 : * dataset is given by NPoints*(NIn+1) matrix
24999 : * each row corresponds to one example
25000 : * first NIn columns are inputs, last column stores class number (from 0 to
25001 : NClasses-1).
25002 :
25003 : -- ALGLIB --
25004 : Copyright 04.09.2012 by Bochkanov Sergey
25005 : *************************************************************************/
25006 0 : double mlperrorsparsesubset(multilayerperceptron* network,
25007 : sparsematrix* xy,
25008 : ae_int_t setsize,
25009 : /* Integer */ ae_vector* subset,
25010 : ae_int_t subsetsize,
25011 : ae_state *_state)
25012 : {
25013 : ae_int_t idx0;
25014 : ae_int_t idx1;
25015 : ae_int_t idxtype;
25016 : double result;
25017 :
25018 :
25019 0 : ae_assert(sparseiscrs(xy, _state), "MLPErrorSparseSubset: XY is not in CRS format.", _state);
25020 0 : ae_assert(sparsegetnrows(xy, _state)>=setsize, "MLPErrorSparseSubset: XY has less than SetSize rows", _state);
25021 0 : if( setsize>0 )
25022 : {
25023 0 : if( mlpissoftmax(network, _state) )
25024 : {
25025 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+1, "MLPErrorSparseSubset: XY has less than NIn+1 columns", _state);
25026 : }
25027 : else
25028 : {
25029 0 : ae_assert(sparsegetncols(xy, _state)>=mlpgetinputscount(network, _state)+mlpgetoutputscount(network, _state), "MLPErrorSparseSubset: XY has less than NIn+NOut columns", _state);
25030 : }
25031 : }
25032 0 : if( subsetsize>=0 )
25033 : {
25034 0 : idx0 = 0;
25035 0 : idx1 = subsetsize;
25036 0 : idxtype = 1;
25037 : }
25038 : else
25039 : {
25040 0 : idx0 = 0;
25041 0 : idx1 = setsize;
25042 0 : idxtype = 0;
25043 : }
25044 0 : mlpallerrorsx(network, &network->dummydxy, xy, setsize, 1, subset, idx0, idx1, idxtype, &network->buf, &network->err, _state);
25045 0 : result = ae_sqr(network->err.rmserror, _state)*(idx1-idx0)*mlpgetoutputscount(network, _state)/2;
25046 0 : return result;
25047 : }
25048 :
25049 :
25050 : /*************************************************************************
25051 : Calculation of all types of errors at once for a subset or full dataset,
25052 : which can be represented in different formats.
25053 :
25054 : THIS INTERNAL FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS!
25055 :
25056 : -- ALGLIB --
25057 : Copyright 26.07.2012 by Bochkanov Sergey
25058 : *************************************************************************/
25059 0 : void mlpallerrorsx(multilayerperceptron* network,
25060 : /* Real */ ae_matrix* densexy,
25061 : sparsematrix* sparsexy,
25062 : ae_int_t datasetsize,
25063 : ae_int_t datasettype,
25064 : /* Integer */ ae_vector* idx,
25065 : ae_int_t subset0,
25066 : ae_int_t subset1,
25067 : ae_int_t subsettype,
25068 : ae_shared_pool* buf,
25069 : modelerrors* rep,
25070 : ae_state *_state)
25071 : {
25072 : ae_frame _frame_block;
25073 : ae_int_t nin;
25074 : ae_int_t nout;
25075 : ae_int_t wcount;
25076 : ae_int_t rowsize;
25077 : ae_bool iscls;
25078 : ae_int_t srcidx;
25079 : ae_int_t cstart;
25080 : ae_int_t csize;
25081 : ae_int_t j;
25082 : mlpbuffers *pbuf;
25083 : ae_smart_ptr _pbuf;
25084 : ae_int_t len0;
25085 : ae_int_t len1;
25086 : modelerrors rep0;
25087 : modelerrors rep1;
25088 : double problemcost;
25089 :
25090 0 : ae_frame_make(_state, &_frame_block);
25091 0 : memset(&_pbuf, 0, sizeof(_pbuf));
25092 0 : memset(&rep0, 0, sizeof(rep0));
25093 0 : memset(&rep1, 0, sizeof(rep1));
25094 0 : ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
25095 0 : _modelerrors_init(&rep0, _state, ae_true);
25096 0 : _modelerrors_init(&rep1, _state, ae_true);
25097 :
25098 0 : ae_assert(datasetsize>=0, "MLPAllErrorsX: SetSize<0", _state);
25099 0 : ae_assert(datasettype==0||datasettype==1, "MLPAllErrorsX: DatasetType is incorrect", _state);
25100 0 : ae_assert(subsettype==0||subsettype==1, "MLPAllErrorsX: SubsetType is incorrect", _state);
25101 :
25102 : /*
25103 : * Determine network properties
25104 : */
25105 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
25106 0 : iscls = mlpissoftmax(network, _state);
25107 :
25108 : /*
25109 : * Split problem.
25110 : *
25111 : * Splitting problem allows us to reduce effect of single-precision
25112 : * arithmetics (SSE-optimized version of MLPChunkedProcess uses single
25113 : * precision internally, but converts them to double precision after
25114 : * results are exported from HPC buffer to network). Small batches are
25115 : * calculated in single precision, results are aggregated in double
25116 : * precision, and it allows us to avoid accumulation of errors when
25117 : * we process very large batches (tens of thousands of items).
25118 : *
25119 : * NOTE: it is important to use real arithmetics for ProblemCost
25120 : * because ProblemCost may be larger than MAXINT.
25121 : */
25122 0 : problemcost = (double)(subset1-subset0);
25123 0 : problemcost = problemcost*wcount*2;
25124 0 : if( ae_fp_greater_eq(problemcost,smpactivationlevel(_state))&&subset1-subset0>=2*mlpbase_microbatchsize )
25125 : {
25126 0 : if( _trypexec_mlpallerrorsx(network,densexy,sparsexy,datasetsize,datasettype,idx,subset0,subset1,subsettype,buf,rep, _state) )
25127 : {
25128 0 : ae_frame_leave(_state);
25129 0 : return;
25130 : }
25131 : }
25132 0 : if( subset1-subset0>=2*mlpbase_microbatchsize&&ae_fp_greater(problemcost,spawnlevel(_state)) )
25133 : {
25134 0 : splitlength(subset1-subset0, mlpbase_microbatchsize, &len0, &len1, _state);
25135 0 : mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0+len0, subsettype, buf, &rep0, _state);
25136 0 : mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0+len0, subset1, subsettype, buf, &rep1, _state);
25137 0 : rep->relclserror = (len0*rep0.relclserror+len1*rep1.relclserror)/(len0+len1);
25138 0 : rep->avgce = (len0*rep0.avgce+len1*rep1.avgce)/(len0+len1);
25139 0 : rep->rmserror = ae_sqrt((len0*ae_sqr(rep0.rmserror, _state)+len1*ae_sqr(rep1.rmserror, _state))/(len0+len1), _state);
25140 0 : rep->avgerror = (len0*rep0.avgerror+len1*rep1.avgerror)/(len0+len1);
25141 0 : rep->avgrelerror = (len0*rep0.avgrelerror+len1*rep1.avgrelerror)/(len0+len1);
25142 0 : ae_frame_leave(_state);
25143 0 : return;
25144 : }
25145 :
25146 : /*
25147 : * Retrieve and prepare
25148 : */
25149 0 : ae_shared_pool_retrieve(buf, &_pbuf, _state);
25150 0 : if( iscls )
25151 : {
25152 0 : rowsize = nin+1;
25153 0 : dserrallocate(nout, &pbuf->tmp0, _state);
25154 : }
25155 : else
25156 : {
25157 0 : rowsize = nin+nout;
25158 0 : dserrallocate(-nout, &pbuf->tmp0, _state);
25159 : }
25160 :
25161 : /*
25162 : * Processing
25163 : */
25164 0 : hpcpreparechunkedgradient(&network->weights, wcount, mlpntotal(network, _state), nin, nout, pbuf, _state);
25165 0 : cstart = subset0;
25166 0 : while(cstart<subset1)
25167 : {
25168 :
25169 : /*
25170 : * Determine size of current chunk and copy it to PBuf.XY
25171 : */
25172 0 : csize = ae_minint(subset1, cstart+pbuf->chunksize, _state)-cstart;
25173 0 : for(j=0; j<=csize-1; j++)
25174 : {
25175 0 : srcidx = -1;
25176 0 : if( subsettype==0 )
25177 : {
25178 0 : srcidx = cstart+j;
25179 : }
25180 0 : if( subsettype==1 )
25181 : {
25182 0 : srcidx = idx->ptr.p_int[cstart+j];
25183 : }
25184 0 : ae_assert(srcidx>=0, "MLPAllErrorsX: internal error", _state);
25185 0 : if( datasettype==0 )
25186 : {
25187 0 : ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,rowsize-1));
25188 : }
25189 0 : if( datasettype==1 )
25190 : {
25191 0 : sparsegetrow(sparsexy, srcidx, &pbuf->xyrow, _state);
25192 0 : ae_v_move(&pbuf->xy.ptr.pp_double[j][0], 1, &pbuf->xyrow.ptr.p_double[0], 1, ae_v_len(0,rowsize-1));
25193 : }
25194 : }
25195 :
25196 : /*
25197 : * Unpack XY and process (temporary code, to be replaced by chunked processing)
25198 : */
25199 0 : for(j=0; j<=csize-1; j++)
25200 : {
25201 0 : ae_v_move(&pbuf->xy2.ptr.pp_double[j][0], 1, &pbuf->xy.ptr.pp_double[j][0], 1, ae_v_len(0,rowsize-1));
25202 : }
25203 0 : mlpbase_mlpchunkedprocess(network, &pbuf->xy2, 0, csize, &pbuf->batch4buf, &pbuf->hpcbuf, _state);
25204 0 : for(j=0; j<=csize-1; j++)
25205 : {
25206 0 : ae_v_move(&pbuf->x.ptr.p_double[0], 1, &pbuf->xy2.ptr.pp_double[j][0], 1, ae_v_len(0,nin-1));
25207 0 : ae_v_move(&pbuf->y.ptr.p_double[0], 1, &pbuf->xy2.ptr.pp_double[j][nin], 1, ae_v_len(0,nout-1));
25208 0 : if( iscls )
25209 : {
25210 0 : pbuf->desiredy.ptr.p_double[0] = pbuf->xy.ptr.pp_double[j][nin];
25211 : }
25212 : else
25213 : {
25214 0 : ae_v_move(&pbuf->desiredy.ptr.p_double[0], 1, &pbuf->xy.ptr.pp_double[j][nin], 1, ae_v_len(0,nout-1));
25215 : }
25216 0 : dserraccumulate(&pbuf->tmp0, &pbuf->y, &pbuf->desiredy, _state);
25217 : }
25218 :
25219 : /*
25220 : * Process chunk and advance line pointer
25221 : */
25222 0 : cstart = cstart+pbuf->chunksize;
25223 : }
25224 0 : dserrfinish(&pbuf->tmp0, _state);
25225 0 : rep->relclserror = pbuf->tmp0.ptr.p_double[0];
25226 0 : rep->avgce = pbuf->tmp0.ptr.p_double[1]/ae_log((double)(2), _state);
25227 0 : rep->rmserror = pbuf->tmp0.ptr.p_double[2];
25228 0 : rep->avgerror = pbuf->tmp0.ptr.p_double[3];
25229 0 : rep->avgrelerror = pbuf->tmp0.ptr.p_double[4];
25230 :
25231 : /*
25232 : * Recycle
25233 : */
25234 0 : ae_shared_pool_recycle(buf, &_pbuf, _state);
25235 0 : ae_frame_leave(_state);
25236 : }
25237 :
25238 :
25239 : /*************************************************************************
25240 : Serial stub for GPL edition.
25241 : *************************************************************************/
25242 0 : ae_bool _trypexec_mlpallerrorsx(multilayerperceptron* network,
25243 : /* Real */ ae_matrix* densexy,
25244 : sparsematrix* sparsexy,
25245 : ae_int_t datasetsize,
25246 : ae_int_t datasettype,
25247 : /* Integer */ ae_vector* idx,
25248 : ae_int_t subset0,
25249 : ae_int_t subset1,
25250 : ae_int_t subsettype,
25251 : ae_shared_pool* buf,
25252 : modelerrors* rep,
25253 : ae_state *_state)
25254 : {
25255 0 : return ae_false;
25256 : }
25257 :
25258 :
25259 : /*************************************************************************
25260 : Internal subroutine: adding new input layer to network
25261 : *************************************************************************/
25262 0 : static void mlpbase_addinputlayer(ae_int_t ncount,
25263 : /* Integer */ ae_vector* lsizes,
25264 : /* Integer */ ae_vector* ltypes,
25265 : /* Integer */ ae_vector* lconnfirst,
25266 : /* Integer */ ae_vector* lconnlast,
25267 : ae_int_t* lastproc,
25268 : ae_state *_state)
25269 : {
25270 :
25271 :
25272 0 : lsizes->ptr.p_int[0] = ncount;
25273 0 : ltypes->ptr.p_int[0] = -2;
25274 0 : lconnfirst->ptr.p_int[0] = 0;
25275 0 : lconnlast->ptr.p_int[0] = 0;
25276 0 : *lastproc = 0;
25277 0 : }
25278 :
25279 :
25280 : /*************************************************************************
25281 : Internal subroutine: adding new summator layer to network
25282 : *************************************************************************/
25283 0 : static void mlpbase_addbiasedsummatorlayer(ae_int_t ncount,
25284 : /* Integer */ ae_vector* lsizes,
25285 : /* Integer */ ae_vector* ltypes,
25286 : /* Integer */ ae_vector* lconnfirst,
25287 : /* Integer */ ae_vector* lconnlast,
25288 : ae_int_t* lastproc,
25289 : ae_state *_state)
25290 : {
25291 :
25292 :
25293 0 : lsizes->ptr.p_int[*lastproc+1] = 1;
25294 0 : ltypes->ptr.p_int[*lastproc+1] = -3;
25295 0 : lconnfirst->ptr.p_int[*lastproc+1] = 0;
25296 0 : lconnlast->ptr.p_int[*lastproc+1] = 0;
25297 0 : lsizes->ptr.p_int[*lastproc+2] = ncount;
25298 0 : ltypes->ptr.p_int[*lastproc+2] = 0;
25299 0 : lconnfirst->ptr.p_int[*lastproc+2] = *lastproc;
25300 0 : lconnlast->ptr.p_int[*lastproc+2] = *lastproc+1;
25301 0 : *lastproc = *lastproc+2;
25302 0 : }
25303 :
25304 :
25305 : /*************************************************************************
25306 : Internal subroutine: adding new summator layer to network
25307 : *************************************************************************/
25308 0 : static void mlpbase_addactivationlayer(ae_int_t functype,
25309 : /* Integer */ ae_vector* lsizes,
25310 : /* Integer */ ae_vector* ltypes,
25311 : /* Integer */ ae_vector* lconnfirst,
25312 : /* Integer */ ae_vector* lconnlast,
25313 : ae_int_t* lastproc,
25314 : ae_state *_state)
25315 : {
25316 :
25317 :
25318 0 : ae_assert(functype>0||functype==-5, "AddActivationLayer: incorrect function type", _state);
25319 0 : lsizes->ptr.p_int[*lastproc+1] = lsizes->ptr.p_int[*lastproc];
25320 0 : ltypes->ptr.p_int[*lastproc+1] = functype;
25321 0 : lconnfirst->ptr.p_int[*lastproc+1] = *lastproc;
25322 0 : lconnlast->ptr.p_int[*lastproc+1] = *lastproc;
25323 0 : *lastproc = *lastproc+1;
25324 0 : }
25325 :
25326 :
25327 : /*************************************************************************
25328 : Internal subroutine: adding new zero layer to network
25329 : *************************************************************************/
25330 0 : static void mlpbase_addzerolayer(/* Integer */ ae_vector* lsizes,
25331 : /* Integer */ ae_vector* ltypes,
25332 : /* Integer */ ae_vector* lconnfirst,
25333 : /* Integer */ ae_vector* lconnlast,
25334 : ae_int_t* lastproc,
25335 : ae_state *_state)
25336 : {
25337 :
25338 :
25339 0 : lsizes->ptr.p_int[*lastproc+1] = 1;
25340 0 : ltypes->ptr.p_int[*lastproc+1] = -4;
25341 0 : lconnfirst->ptr.p_int[*lastproc+1] = 0;
25342 0 : lconnlast->ptr.p_int[*lastproc+1] = 0;
25343 0 : *lastproc = *lastproc+1;
25344 0 : }
25345 :
25346 :
25347 : /*************************************************************************
25348 : This routine adds input layer to the high-level description of the network.
25349 :
25350 : It modifies Network.HLConnections and Network.HLNeurons and assumes that
25351 : these arrays have enough place to store data. It accepts following
25352 : parameters:
25353 : Network - network
25354 : ConnIdx - index of the first free entry in the HLConnections
25355 : NeuroIdx - index of the first free entry in the HLNeurons
25356 : StructInfoIdx- index of the first entry in the low level description
25357 : of the current layer (in the StructInfo array)
25358 : NIn - number of inputs
25359 :
25360 : It modified Network and indices.
25361 : *************************************************************************/
25362 0 : static void mlpbase_hladdinputlayer(multilayerperceptron* network,
25363 : ae_int_t* connidx,
25364 : ae_int_t* neuroidx,
25365 : ae_int_t* structinfoidx,
25366 : ae_int_t nin,
25367 : ae_state *_state)
25368 : {
25369 : ae_int_t i;
25370 : ae_int_t offs;
25371 :
25372 :
25373 0 : offs = mlpbase_hlnfieldwidth*(*neuroidx);
25374 0 : for(i=0; i<=nin-1; i++)
25375 : {
25376 0 : network->hlneurons.ptr.p_int[offs+0] = 0;
25377 0 : network->hlneurons.ptr.p_int[offs+1] = i;
25378 0 : network->hlneurons.ptr.p_int[offs+2] = -1;
25379 0 : network->hlneurons.ptr.p_int[offs+3] = -1;
25380 0 : offs = offs+mlpbase_hlnfieldwidth;
25381 : }
25382 0 : *neuroidx = *neuroidx+nin;
25383 0 : *structinfoidx = *structinfoidx+nin;
25384 0 : }
25385 :
25386 :
25387 : /*************************************************************************
25388 : This routine adds output layer to the high-level description of
25389 : the network.
25390 :
25391 : It modifies Network.HLConnections and Network.HLNeurons and assumes that
25392 : these arrays have enough place to store data. It accepts following
25393 : parameters:
25394 : Network - network
25395 : ConnIdx - index of the first free entry in the HLConnections
25396 : NeuroIdx - index of the first free entry in the HLNeurons
25397 : StructInfoIdx- index of the first entry in the low level description
25398 : of the current layer (in the StructInfo array)
25399 : WeightsIdx - index of the first entry in the Weights array which
25400 : corresponds to the current layer
25401 : K - current layer index
25402 : NPrev - number of neurons in the previous layer
25403 : NOut - number of outputs
25404 : IsCls - is it classifier network?
25405 : IsLinear - is it network with linear output?
25406 :
25407 : It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx.
25408 : *************************************************************************/
25409 0 : static void mlpbase_hladdoutputlayer(multilayerperceptron* network,
25410 : ae_int_t* connidx,
25411 : ae_int_t* neuroidx,
25412 : ae_int_t* structinfoidx,
25413 : ae_int_t* weightsidx,
25414 : ae_int_t k,
25415 : ae_int_t nprev,
25416 : ae_int_t nout,
25417 : ae_bool iscls,
25418 : ae_bool islinearout,
25419 : ae_state *_state)
25420 : {
25421 : ae_int_t i;
25422 : ae_int_t j;
25423 : ae_int_t neurooffs;
25424 : ae_int_t connoffs;
25425 :
25426 :
25427 0 : ae_assert((iscls&&islinearout)||!iscls, "HLAddOutputLayer: internal error", _state);
25428 0 : neurooffs = mlpbase_hlnfieldwidth*(*neuroidx);
25429 0 : connoffs = mlpbase_hlconnfieldwidth*(*connidx);
25430 0 : if( !iscls )
25431 : {
25432 :
25433 : /*
25434 : * Regression network
25435 : */
25436 0 : for(i=0; i<=nout-1; i++)
25437 : {
25438 0 : network->hlneurons.ptr.p_int[neurooffs+0] = k;
25439 0 : network->hlneurons.ptr.p_int[neurooffs+1] = i;
25440 0 : network->hlneurons.ptr.p_int[neurooffs+2] = *structinfoidx+1+nout+i;
25441 0 : network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
25442 0 : neurooffs = neurooffs+mlpbase_hlnfieldwidth;
25443 : }
25444 0 : for(i=0; i<=nprev-1; i++)
25445 : {
25446 0 : for(j=0; j<=nout-1; j++)
25447 : {
25448 0 : network->hlconnections.ptr.p_int[connoffs+0] = k-1;
25449 0 : network->hlconnections.ptr.p_int[connoffs+1] = i;
25450 0 : network->hlconnections.ptr.p_int[connoffs+2] = k;
25451 0 : network->hlconnections.ptr.p_int[connoffs+3] = j;
25452 0 : network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
25453 0 : connoffs = connoffs+mlpbase_hlconnfieldwidth;
25454 : }
25455 : }
25456 0 : *connidx = *connidx+nprev*nout;
25457 0 : *neuroidx = *neuroidx+nout;
25458 0 : *structinfoidx = *structinfoidx+2*nout+1;
25459 0 : *weightsidx = *weightsidx+nout*(nprev+1);
25460 : }
25461 : else
25462 : {
25463 :
25464 : /*
25465 : * Classification network
25466 : */
25467 0 : for(i=0; i<=nout-2; i++)
25468 : {
25469 0 : network->hlneurons.ptr.p_int[neurooffs+0] = k;
25470 0 : network->hlneurons.ptr.p_int[neurooffs+1] = i;
25471 0 : network->hlneurons.ptr.p_int[neurooffs+2] = -1;
25472 0 : network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
25473 0 : neurooffs = neurooffs+mlpbase_hlnfieldwidth;
25474 : }
25475 0 : network->hlneurons.ptr.p_int[neurooffs+0] = k;
25476 0 : network->hlneurons.ptr.p_int[neurooffs+1] = i;
25477 0 : network->hlneurons.ptr.p_int[neurooffs+2] = -1;
25478 0 : network->hlneurons.ptr.p_int[neurooffs+3] = -1;
25479 0 : for(i=0; i<=nprev-1; i++)
25480 : {
25481 0 : for(j=0; j<=nout-2; j++)
25482 : {
25483 0 : network->hlconnections.ptr.p_int[connoffs+0] = k-1;
25484 0 : network->hlconnections.ptr.p_int[connoffs+1] = i;
25485 0 : network->hlconnections.ptr.p_int[connoffs+2] = k;
25486 0 : network->hlconnections.ptr.p_int[connoffs+3] = j;
25487 0 : network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
25488 0 : connoffs = connoffs+mlpbase_hlconnfieldwidth;
25489 : }
25490 : }
25491 0 : *connidx = *connidx+nprev*(nout-1);
25492 0 : *neuroidx = *neuroidx+nout;
25493 0 : *structinfoidx = *structinfoidx+nout+2;
25494 0 : *weightsidx = *weightsidx+(nout-1)*(nprev+1);
25495 : }
25496 0 : }
25497 :
25498 :
25499 : /*************************************************************************
25500 : This routine adds hidden layer to the high-level description of
25501 : the network.
25502 :
25503 : It modifies Network.HLConnections and Network.HLNeurons and assumes that
25504 : these arrays have enough place to store data. It accepts following
25505 : parameters:
25506 : Network - network
25507 : ConnIdx - index of the first free entry in the HLConnections
25508 : NeuroIdx - index of the first free entry in the HLNeurons
25509 : StructInfoIdx- index of the first entry in the low level description
25510 : of the current layer (in the StructInfo array)
25511 : WeightsIdx - index of the first entry in the Weights array which
25512 : corresponds to the current layer
25513 : K - current layer index
25514 : NPrev - number of neurons in the previous layer
25515 : NCur - number of neurons in the current layer
25516 :
25517 : It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx.
25518 : *************************************************************************/
25519 0 : static void mlpbase_hladdhiddenlayer(multilayerperceptron* network,
25520 : ae_int_t* connidx,
25521 : ae_int_t* neuroidx,
25522 : ae_int_t* structinfoidx,
25523 : ae_int_t* weightsidx,
25524 : ae_int_t k,
25525 : ae_int_t nprev,
25526 : ae_int_t ncur,
25527 : ae_state *_state)
25528 : {
25529 : ae_int_t i;
25530 : ae_int_t j;
25531 : ae_int_t neurooffs;
25532 : ae_int_t connoffs;
25533 :
25534 :
25535 0 : neurooffs = mlpbase_hlnfieldwidth*(*neuroidx);
25536 0 : connoffs = mlpbase_hlconnfieldwidth*(*connidx);
25537 0 : for(i=0; i<=ncur-1; i++)
25538 : {
25539 0 : network->hlneurons.ptr.p_int[neurooffs+0] = k;
25540 0 : network->hlneurons.ptr.p_int[neurooffs+1] = i;
25541 0 : network->hlneurons.ptr.p_int[neurooffs+2] = *structinfoidx+1+ncur+i;
25542 0 : network->hlneurons.ptr.p_int[neurooffs+3] = *weightsidx+nprev+(nprev+1)*i;
25543 0 : neurooffs = neurooffs+mlpbase_hlnfieldwidth;
25544 : }
25545 0 : for(i=0; i<=nprev-1; i++)
25546 : {
25547 0 : for(j=0; j<=ncur-1; j++)
25548 : {
25549 0 : network->hlconnections.ptr.p_int[connoffs+0] = k-1;
25550 0 : network->hlconnections.ptr.p_int[connoffs+1] = i;
25551 0 : network->hlconnections.ptr.p_int[connoffs+2] = k;
25552 0 : network->hlconnections.ptr.p_int[connoffs+3] = j;
25553 0 : network->hlconnections.ptr.p_int[connoffs+4] = *weightsidx+i+j*(nprev+1);
25554 0 : connoffs = connoffs+mlpbase_hlconnfieldwidth;
25555 : }
25556 : }
25557 0 : *connidx = *connidx+nprev*ncur;
25558 0 : *neuroidx = *neuroidx+ncur;
25559 0 : *structinfoidx = *structinfoidx+2*ncur+1;
25560 0 : *weightsidx = *weightsidx+ncur*(nprev+1);
25561 0 : }
25562 :
25563 :
25564 : /*************************************************************************
25565 : This function fills high level information about network created using
25566 : internal MLPCreate() function.
25567 :
25568 : This function does NOT examine StructInfo for low level information, it
25569 : just expects that network has following structure:
25570 :
25571 : input neuron \
25572 : ... | input layer
25573 : input neuron /
25574 :
25575 : "-1" neuron \
25576 : biased summator |
25577 : ... |
25578 : biased summator | hidden layer(s), if there are exists any
25579 : activation function |
25580 : ... |
25581 : activation function /
25582 :
25583 : "-1" neuron \
25584 : biased summator | output layer:
25585 : ... |
25586 : biased summator | * we have NOut summators/activators for regression networks
25587 : activation function | * we have only NOut-1 summators and no activators for classifiers
25588 : ... | * we have "0" neuron only when we have classifier
25589 : activation function |
25590 : "0" neuron /
25591 :
25592 :
25593 : -- ALGLIB --
25594 : Copyright 30.03.2008 by Bochkanov Sergey
25595 : *************************************************************************/
25596 0 : static void mlpbase_fillhighlevelinformation(multilayerperceptron* network,
25597 : ae_int_t nin,
25598 : ae_int_t nhid1,
25599 : ae_int_t nhid2,
25600 : ae_int_t nout,
25601 : ae_bool iscls,
25602 : ae_bool islinearout,
25603 : ae_state *_state)
25604 : {
25605 : ae_int_t idxweights;
25606 : ae_int_t idxstruct;
25607 : ae_int_t idxneuro;
25608 : ae_int_t idxconn;
25609 :
25610 :
25611 0 : ae_assert((iscls&&islinearout)||!iscls, "FillHighLevelInformation: internal error", _state);
25612 :
25613 : /*
25614 : * Preparations common to all types of networks
25615 : */
25616 0 : idxweights = 0;
25617 0 : idxneuro = 0;
25618 0 : idxstruct = 0;
25619 0 : idxconn = 0;
25620 0 : network->hlnetworktype = 0;
25621 :
25622 : /*
25623 : * network without hidden layers
25624 : */
25625 0 : if( nhid1==0 )
25626 : {
25627 0 : ae_vector_set_length(&network->hllayersizes, 2, _state);
25628 0 : network->hllayersizes.ptr.p_int[0] = nin;
25629 0 : network->hllayersizes.ptr.p_int[1] = nout;
25630 0 : if( !iscls )
25631 : {
25632 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*nin*nout, _state);
25633 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nout), _state);
25634 0 : network->hlnormtype = 0;
25635 : }
25636 : else
25637 : {
25638 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*nin*(nout-1), _state);
25639 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nout), _state);
25640 0 : network->hlnormtype = 1;
25641 : }
25642 0 : mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
25643 0 : mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nout, iscls, islinearout, _state);
25644 0 : return;
25645 : }
25646 :
25647 : /*
25648 : * network with one hidden layers
25649 : */
25650 0 : if( nhid2==0 )
25651 : {
25652 0 : ae_vector_set_length(&network->hllayersizes, 3, _state);
25653 0 : network->hllayersizes.ptr.p_int[0] = nin;
25654 0 : network->hllayersizes.ptr.p_int[1] = nhid1;
25655 0 : network->hllayersizes.ptr.p_int[2] = nout;
25656 0 : if( !iscls )
25657 : {
25658 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nout), _state);
25659 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nout), _state);
25660 0 : network->hlnormtype = 0;
25661 : }
25662 : else
25663 : {
25664 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*(nout-1)), _state);
25665 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nout), _state);
25666 0 : network->hlnormtype = 1;
25667 : }
25668 0 : mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
25669 0 : mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nhid1, _state);
25670 0 : mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 2, nhid1, nout, iscls, islinearout, _state);
25671 0 : return;
25672 : }
25673 :
25674 : /*
25675 : * Two hidden layers
25676 : */
25677 0 : ae_vector_set_length(&network->hllayersizes, 4, _state);
25678 0 : network->hllayersizes.ptr.p_int[0] = nin;
25679 0 : network->hllayersizes.ptr.p_int[1] = nhid1;
25680 0 : network->hllayersizes.ptr.p_int[2] = nhid2;
25681 0 : network->hllayersizes.ptr.p_int[3] = nout;
25682 0 : if( !iscls )
25683 : {
25684 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*nout), _state);
25685 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nhid2+nout), _state);
25686 0 : network->hlnormtype = 0;
25687 : }
25688 : else
25689 : {
25690 0 : ae_vector_set_length(&network->hlconnections, mlpbase_hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*(nout-1)), _state);
25691 0 : ae_vector_set_length(&network->hlneurons, mlpbase_hlnfieldwidth*(nin+nhid1+nhid2+nout), _state);
25692 0 : network->hlnormtype = 1;
25693 : }
25694 0 : mlpbase_hladdinputlayer(network, &idxconn, &idxneuro, &idxstruct, nin, _state);
25695 0 : mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 1, nin, nhid1, _state);
25696 0 : mlpbase_hladdhiddenlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 2, nhid1, nhid2, _state);
25697 0 : mlpbase_hladdoutputlayer(network, &idxconn, &idxneuro, &idxstruct, &idxweights, 3, nhid2, nout, iscls, islinearout, _state);
25698 : }
25699 :
25700 :
25701 : /*************************************************************************
25702 : Internal subroutine.
25703 :
25704 : -- ALGLIB --
25705 : Copyright 04.11.2007 by Bochkanov Sergey
25706 : *************************************************************************/
25707 0 : static void mlpbase_mlpcreate(ae_int_t nin,
25708 : ae_int_t nout,
25709 : /* Integer */ ae_vector* lsizes,
25710 : /* Integer */ ae_vector* ltypes,
25711 : /* Integer */ ae_vector* lconnfirst,
25712 : /* Integer */ ae_vector* lconnlast,
25713 : ae_int_t layerscount,
25714 : ae_bool isclsnet,
25715 : multilayerperceptron* network,
25716 : ae_state *_state)
25717 : {
25718 : ae_frame _frame_block;
25719 : ae_int_t i;
25720 : ae_int_t j;
25721 : ae_int_t ssize;
25722 : ae_int_t ntotal;
25723 : ae_int_t wcount;
25724 : ae_int_t offs;
25725 : ae_int_t nprocessed;
25726 : ae_int_t wallocated;
25727 : ae_vector localtemp;
25728 : ae_vector lnfirst;
25729 : ae_vector lnsyn;
25730 : mlpbuffers buf;
25731 : smlpgrad sgrad;
25732 :
25733 0 : ae_frame_make(_state, &_frame_block);
25734 0 : memset(&localtemp, 0, sizeof(localtemp));
25735 0 : memset(&lnfirst, 0, sizeof(lnfirst));
25736 0 : memset(&lnsyn, 0, sizeof(lnsyn));
25737 0 : memset(&buf, 0, sizeof(buf));
25738 0 : memset(&sgrad, 0, sizeof(sgrad));
25739 0 : _multilayerperceptron_clear(network);
25740 0 : ae_vector_init(&localtemp, 0, DT_INT, _state, ae_true);
25741 0 : ae_vector_init(&lnfirst, 0, DT_INT, _state, ae_true);
25742 0 : ae_vector_init(&lnsyn, 0, DT_INT, _state, ae_true);
25743 0 : _mlpbuffers_init(&buf, _state, ae_true);
25744 0 : _smlpgrad_init(&sgrad, _state, ae_true);
25745 :
25746 :
25747 : /*
25748 : * Check
25749 : */
25750 0 : ae_assert(layerscount>0, "MLPCreate: wrong parameters!", _state);
25751 0 : ae_assert(ltypes->ptr.p_int[0]==-2, "MLPCreate: wrong LTypes[0] (must be -2)!", _state);
25752 0 : for(i=0; i<=layerscount-1; i++)
25753 : {
25754 0 : ae_assert(lsizes->ptr.p_int[i]>0, "MLPCreate: wrong LSizes!", _state);
25755 0 : ae_assert(lconnfirst->ptr.p_int[i]>=0&&(lconnfirst->ptr.p_int[i]<i||i==0), "MLPCreate: wrong LConnFirst!", _state);
25756 0 : ae_assert(lconnlast->ptr.p_int[i]>=lconnfirst->ptr.p_int[i]&&(lconnlast->ptr.p_int[i]<i||i==0), "MLPCreate: wrong LConnLast!", _state);
25757 : }
25758 :
25759 : /*
25760 : * Build network geometry
25761 : */
25762 0 : ae_vector_set_length(&lnfirst, layerscount-1+1, _state);
25763 0 : ae_vector_set_length(&lnsyn, layerscount-1+1, _state);
25764 0 : ntotal = 0;
25765 0 : wcount = 0;
25766 0 : for(i=0; i<=layerscount-1; i++)
25767 : {
25768 :
25769 : /*
25770 : * Analyze connections.
25771 : * This code must throw an assertion in case of unknown LTypes[I]
25772 : */
25773 0 : lnsyn.ptr.p_int[i] = -1;
25774 0 : if( ltypes->ptr.p_int[i]>=0||ltypes->ptr.p_int[i]==-5 )
25775 : {
25776 0 : lnsyn.ptr.p_int[i] = 0;
25777 0 : for(j=lconnfirst->ptr.p_int[i]; j<=lconnlast->ptr.p_int[i]; j++)
25778 : {
25779 0 : lnsyn.ptr.p_int[i] = lnsyn.ptr.p_int[i]+lsizes->ptr.p_int[j];
25780 : }
25781 : }
25782 : else
25783 : {
25784 0 : if( (ltypes->ptr.p_int[i]==-2||ltypes->ptr.p_int[i]==-3)||ltypes->ptr.p_int[i]==-4 )
25785 : {
25786 0 : lnsyn.ptr.p_int[i] = 0;
25787 : }
25788 : }
25789 0 : ae_assert(lnsyn.ptr.p_int[i]>=0, "MLPCreate: internal error #0!", _state);
25790 :
25791 : /*
25792 : * Other info
25793 : */
25794 0 : lnfirst.ptr.p_int[i] = ntotal;
25795 0 : ntotal = ntotal+lsizes->ptr.p_int[i];
25796 0 : if( ltypes->ptr.p_int[i]==0 )
25797 : {
25798 0 : wcount = wcount+lnsyn.ptr.p_int[i]*lsizes->ptr.p_int[i];
25799 : }
25800 : }
25801 0 : ssize = 7+ntotal*mlpbase_nfieldwidth;
25802 :
25803 : /*
25804 : * Allocate
25805 : */
25806 0 : ae_vector_set_length(&network->structinfo, ssize-1+1, _state);
25807 0 : ae_vector_set_length(&network->weights, wcount-1+1, _state);
25808 0 : if( isclsnet )
25809 : {
25810 0 : ae_vector_set_length(&network->columnmeans, nin-1+1, _state);
25811 0 : ae_vector_set_length(&network->columnsigmas, nin-1+1, _state);
25812 : }
25813 : else
25814 : {
25815 0 : ae_vector_set_length(&network->columnmeans, nin+nout-1+1, _state);
25816 0 : ae_vector_set_length(&network->columnsigmas, nin+nout-1+1, _state);
25817 : }
25818 0 : ae_vector_set_length(&network->neurons, ntotal-1+1, _state);
25819 0 : ae_vector_set_length(&network->nwbuf, ae_maxint(wcount, 2*nout, _state)-1+1, _state);
25820 0 : ae_vector_set_length(&network->integerbuf, 3+1, _state);
25821 0 : ae_vector_set_length(&network->dfdnet, ntotal-1+1, _state);
25822 0 : ae_vector_set_length(&network->x, nin-1+1, _state);
25823 0 : ae_vector_set_length(&network->y, nout-1+1, _state);
25824 0 : ae_vector_set_length(&network->derror, ntotal-1+1, _state);
25825 :
25826 : /*
25827 : * Fill structure:
25828 : * * first, fill by dummy values to avoid spurious reports by Valgrind
25829 : * * then fill global info header
25830 : */
25831 0 : for(i=0; i<=ssize-1; i++)
25832 : {
25833 0 : network->structinfo.ptr.p_int[i] = -999999;
25834 : }
25835 0 : network->structinfo.ptr.p_int[0] = ssize;
25836 0 : network->structinfo.ptr.p_int[1] = nin;
25837 0 : network->structinfo.ptr.p_int[2] = nout;
25838 0 : network->structinfo.ptr.p_int[3] = ntotal;
25839 0 : network->structinfo.ptr.p_int[4] = wcount;
25840 0 : network->structinfo.ptr.p_int[5] = 7;
25841 0 : if( isclsnet )
25842 : {
25843 0 : network->structinfo.ptr.p_int[6] = 1;
25844 : }
25845 : else
25846 : {
25847 0 : network->structinfo.ptr.p_int[6] = 0;
25848 : }
25849 :
25850 : /*
25851 : * Fill structure: neuron connections
25852 : */
25853 0 : nprocessed = 0;
25854 0 : wallocated = 0;
25855 0 : for(i=0; i<=layerscount-1; i++)
25856 : {
25857 0 : for(j=0; j<=lsizes->ptr.p_int[i]-1; j++)
25858 : {
25859 0 : offs = network->structinfo.ptr.p_int[5]+nprocessed*mlpbase_nfieldwidth;
25860 0 : network->structinfo.ptr.p_int[offs+0] = ltypes->ptr.p_int[i];
25861 0 : if( ltypes->ptr.p_int[i]==0 )
25862 : {
25863 :
25864 : /*
25865 : * Adaptive summator:
25866 : * * connections with weights to previous neurons
25867 : */
25868 0 : network->structinfo.ptr.p_int[offs+1] = lnsyn.ptr.p_int[i];
25869 0 : network->structinfo.ptr.p_int[offs+2] = lnfirst.ptr.p_int[lconnfirst->ptr.p_int[i]];
25870 0 : network->structinfo.ptr.p_int[offs+3] = wallocated;
25871 0 : wallocated = wallocated+lnsyn.ptr.p_int[i];
25872 0 : nprocessed = nprocessed+1;
25873 : }
25874 0 : if( ltypes->ptr.p_int[i]>0||ltypes->ptr.p_int[i]==-5 )
25875 : {
25876 :
25877 : /*
25878 : * Activation layer:
25879 : * * each neuron connected to one (only one) of previous neurons.
25880 : * * no weights
25881 : */
25882 0 : network->structinfo.ptr.p_int[offs+1] = 1;
25883 0 : network->structinfo.ptr.p_int[offs+2] = lnfirst.ptr.p_int[lconnfirst->ptr.p_int[i]]+j;
25884 0 : network->structinfo.ptr.p_int[offs+3] = -1;
25885 0 : nprocessed = nprocessed+1;
25886 : }
25887 0 : if( (ltypes->ptr.p_int[i]==-2||ltypes->ptr.p_int[i]==-3)||ltypes->ptr.p_int[i]==-4 )
25888 : {
25889 0 : nprocessed = nprocessed+1;
25890 : }
25891 : }
25892 : }
25893 0 : ae_assert(wallocated==wcount, "MLPCreate: internal error #1!", _state);
25894 0 : ae_assert(nprocessed==ntotal, "MLPCreate: internal error #2!", _state);
25895 :
25896 : /*
25897 : * Fill weights by small random values
25898 : * Initialize means and sigmas
25899 : */
25900 0 : for(i=0; i<=nin-1; i++)
25901 : {
25902 0 : network->columnmeans.ptr.p_double[i] = (double)(0);
25903 0 : network->columnsigmas.ptr.p_double[i] = (double)(1);
25904 : }
25905 0 : if( !isclsnet )
25906 : {
25907 0 : for(i=0; i<=nout-1; i++)
25908 : {
25909 0 : network->columnmeans.ptr.p_double[nin+i] = (double)(0);
25910 0 : network->columnsigmas.ptr.p_double[nin+i] = (double)(1);
25911 : }
25912 : }
25913 0 : mlprandomize(network, _state);
25914 :
25915 : /*
25916 : * Seed buffers
25917 : */
25918 0 : ae_shared_pool_set_seed(&network->buf, &buf, sizeof(buf), _mlpbuffers_init, _mlpbuffers_init_copy, _mlpbuffers_destroy, _state);
25919 0 : ae_vector_set_length(&sgrad.g, wcount, _state);
25920 0 : sgrad.f = 0.0;
25921 0 : for(i=0; i<=wcount-1; i++)
25922 : {
25923 0 : sgrad.g.ptr.p_double[i] = 0.0;
25924 : }
25925 0 : ae_shared_pool_set_seed(&network->gradbuf, &sgrad, sizeof(sgrad), _smlpgrad_init, _smlpgrad_init_copy, _smlpgrad_destroy, _state);
25926 0 : ae_frame_leave(_state);
25927 0 : }
25928 :
25929 :
25930 : /*************************************************************************
25931 : Internal subroutine for Hessian calculation.
25932 :
25933 : WARNING! Unspeakable math far beyong human capabilities :)
25934 : *************************************************************************/
25935 0 : static void mlpbase_mlphessianbatchinternal(multilayerperceptron* network,
25936 : /* Real */ ae_matrix* xy,
25937 : ae_int_t ssize,
25938 : ae_bool naturalerr,
25939 : double* e,
25940 : /* Real */ ae_vector* grad,
25941 : /* Real */ ae_matrix* h,
25942 : ae_state *_state)
25943 : {
25944 : ae_frame _frame_block;
25945 : ae_int_t nin;
25946 : ae_int_t nout;
25947 : ae_int_t wcount;
25948 : ae_int_t ntotal;
25949 : ae_int_t istart;
25950 : ae_int_t i;
25951 : ae_int_t j;
25952 : ae_int_t k;
25953 : ae_int_t kl;
25954 : ae_int_t offs;
25955 : ae_int_t n1;
25956 : ae_int_t n2;
25957 : ae_int_t w1;
25958 : ae_int_t w2;
25959 : double s;
25960 : double t;
25961 : double v;
25962 : double et;
25963 : ae_bool bflag;
25964 : double f;
25965 : double df;
25966 : double d2f;
25967 : double deidyj;
25968 : double mx;
25969 : double q;
25970 : double z;
25971 : double s2;
25972 : double expi;
25973 : double expj;
25974 : ae_vector x;
25975 : ae_vector desiredy;
25976 : ae_vector gt;
25977 : ae_vector zeros;
25978 : ae_matrix rx;
25979 : ae_matrix ry;
25980 : ae_matrix rdx;
25981 : ae_matrix rdy;
25982 :
25983 0 : ae_frame_make(_state, &_frame_block);
25984 0 : memset(&x, 0, sizeof(x));
25985 0 : memset(&desiredy, 0, sizeof(desiredy));
25986 0 : memset(>, 0, sizeof(gt));
25987 0 : memset(&zeros, 0, sizeof(zeros));
25988 0 : memset(&rx, 0, sizeof(rx));
25989 0 : memset(&ry, 0, sizeof(ry));
25990 0 : memset(&rdx, 0, sizeof(rdx));
25991 0 : memset(&rdy, 0, sizeof(rdy));
25992 0 : *e = 0;
25993 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
25994 0 : ae_vector_init(&desiredy, 0, DT_REAL, _state, ae_true);
25995 0 : ae_vector_init(>, 0, DT_REAL, _state, ae_true);
25996 0 : ae_vector_init(&zeros, 0, DT_REAL, _state, ae_true);
25997 0 : ae_matrix_init(&rx, 0, 0, DT_REAL, _state, ae_true);
25998 0 : ae_matrix_init(&ry, 0, 0, DT_REAL, _state, ae_true);
25999 0 : ae_matrix_init(&rdx, 0, 0, DT_REAL, _state, ae_true);
26000 0 : ae_matrix_init(&rdy, 0, 0, DT_REAL, _state, ae_true);
26001 :
26002 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
26003 0 : ntotal = network->structinfo.ptr.p_int[3];
26004 0 : istart = network->structinfo.ptr.p_int[5];
26005 :
26006 : /*
26007 : * Prepare
26008 : */
26009 0 : ae_vector_set_length(&x, nin-1+1, _state);
26010 0 : ae_vector_set_length(&desiredy, nout-1+1, _state);
26011 0 : ae_vector_set_length(&zeros, wcount-1+1, _state);
26012 0 : ae_vector_set_length(>, wcount-1+1, _state);
26013 0 : ae_matrix_set_length(&rx, ntotal+nout-1+1, wcount-1+1, _state);
26014 0 : ae_matrix_set_length(&ry, ntotal+nout-1+1, wcount-1+1, _state);
26015 0 : ae_matrix_set_length(&rdx, ntotal+nout-1+1, wcount-1+1, _state);
26016 0 : ae_matrix_set_length(&rdy, ntotal+nout-1+1, wcount-1+1, _state);
26017 0 : *e = (double)(0);
26018 0 : for(i=0; i<=wcount-1; i++)
26019 : {
26020 0 : zeros.ptr.p_double[i] = (double)(0);
26021 : }
26022 0 : ae_v_move(&grad->ptr.p_double[0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26023 0 : for(i=0; i<=wcount-1; i++)
26024 : {
26025 0 : ae_v_move(&h->ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26026 : }
26027 :
26028 : /*
26029 : * Process
26030 : */
26031 0 : for(k=0; k<=ssize-1; k++)
26032 : {
26033 :
26034 : /*
26035 : * Process vector with MLPGradN.
26036 : * Now Neurons, DFDNET and DError contains results of the last run.
26037 : */
26038 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nin-1));
26039 0 : if( mlpissoftmax(network, _state) )
26040 : {
26041 :
26042 : /*
26043 : * class labels outputs
26044 : */
26045 0 : kl = ae_round(xy->ptr.pp_double[k][nin], _state);
26046 0 : for(i=0; i<=nout-1; i++)
26047 : {
26048 0 : if( i==kl )
26049 : {
26050 0 : desiredy.ptr.p_double[i] = (double)(1);
26051 : }
26052 : else
26053 : {
26054 0 : desiredy.ptr.p_double[i] = (double)(0);
26055 : }
26056 : }
26057 : }
26058 : else
26059 : {
26060 :
26061 : /*
26062 : * real outputs
26063 : */
26064 0 : ae_v_move(&desiredy.ptr.p_double[0], 1, &xy->ptr.pp_double[k][nin], 1, ae_v_len(0,nout-1));
26065 : }
26066 0 : if( naturalerr )
26067 : {
26068 0 : mlpgradn(network, &x, &desiredy, &et, >, _state);
26069 : }
26070 : else
26071 : {
26072 0 : mlpgrad(network, &x, &desiredy, &et, >, _state);
26073 : }
26074 :
26075 : /*
26076 : * grad, error
26077 : */
26078 0 : *e = *e+et;
26079 0 : ae_v_add(&grad->ptr.p_double[0], 1, >.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26080 :
26081 : /*
26082 : * Hessian.
26083 : * Forward pass of the R-algorithm
26084 : */
26085 0 : for(i=0; i<=ntotal-1; i++)
26086 : {
26087 0 : offs = istart+i*mlpbase_nfieldwidth;
26088 0 : ae_v_move(&rx.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26089 0 : ae_v_move(&ry.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26090 0 : if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
26091 : {
26092 :
26093 : /*
26094 : * Activation function
26095 : */
26096 0 : n1 = network->structinfo.ptr.p_int[offs+2];
26097 0 : ae_v_move(&rx.ptr.pp_double[i][0], 1, &ry.ptr.pp_double[n1][0], 1, ae_v_len(0,wcount-1));
26098 0 : v = network->dfdnet.ptr.p_double[i];
26099 0 : ae_v_moved(&ry.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
26100 0 : continue;
26101 : }
26102 0 : if( network->structinfo.ptr.p_int[offs+0]==0 )
26103 : {
26104 :
26105 : /*
26106 : * Adaptive summator
26107 : */
26108 0 : n1 = network->structinfo.ptr.p_int[offs+2];
26109 0 : n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
26110 0 : w1 = network->structinfo.ptr.p_int[offs+3];
26111 0 : w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
26112 0 : for(j=n1; j<=n2; j++)
26113 : {
26114 0 : v = network->weights.ptr.p_double[w1+j-n1];
26115 0 : ae_v_addd(&rx.ptr.pp_double[i][0], 1, &ry.ptr.pp_double[j][0], 1, ae_v_len(0,wcount-1), v);
26116 0 : rx.ptr.pp_double[i][w1+j-n1] = rx.ptr.pp_double[i][w1+j-n1]+network->neurons.ptr.p_double[j];
26117 : }
26118 0 : ae_v_move(&ry.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
26119 0 : continue;
26120 : }
26121 0 : if( network->structinfo.ptr.p_int[offs+0]<0 )
26122 : {
26123 0 : bflag = ae_true;
26124 0 : if( network->structinfo.ptr.p_int[offs+0]==-2 )
26125 : {
26126 :
26127 : /*
26128 : * input neuron, left unchanged
26129 : */
26130 0 : bflag = ae_false;
26131 : }
26132 0 : if( network->structinfo.ptr.p_int[offs+0]==-3 )
26133 : {
26134 :
26135 : /*
26136 : * "-1" neuron, left unchanged
26137 : */
26138 0 : bflag = ae_false;
26139 : }
26140 0 : if( network->structinfo.ptr.p_int[offs+0]==-4 )
26141 : {
26142 :
26143 : /*
26144 : * "0" neuron, left unchanged
26145 : */
26146 0 : bflag = ae_false;
26147 : }
26148 0 : ae_assert(!bflag, "MLPHessianNBatch: internal error - unknown neuron type!", _state);
26149 0 : continue;
26150 : }
26151 : }
26152 :
26153 : /*
26154 : * Hessian. Backward pass of the R-algorithm.
26155 : *
26156 : * Stage 1. Initialize RDY
26157 : */
26158 0 : for(i=0; i<=ntotal+nout-1; i++)
26159 : {
26160 0 : ae_v_move(&rdy.ptr.pp_double[i][0], 1, &zeros.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
26161 : }
26162 0 : if( network->structinfo.ptr.p_int[6]==0 )
26163 : {
26164 :
26165 : /*
26166 : * Standardisation.
26167 : *
26168 : * In context of the Hessian calculation standardisation
26169 : * is considered as additional layer with weightless
26170 : * activation function:
26171 : *
26172 : * F(NET) := Sigma*NET
26173 : *
26174 : * So we add one more layer to forward pass, and
26175 : * make forward/backward pass through this layer.
26176 : */
26177 0 : for(i=0; i<=nout-1; i++)
26178 : {
26179 0 : n1 = ntotal-nout+i;
26180 0 : n2 = ntotal+i;
26181 :
26182 : /*
26183 : * Forward pass from N1 to N2
26184 : */
26185 0 : ae_v_move(&rx.ptr.pp_double[n2][0], 1, &ry.ptr.pp_double[n1][0], 1, ae_v_len(0,wcount-1));
26186 0 : v = network->columnsigmas.ptr.p_double[nin+i];
26187 0 : ae_v_moved(&ry.ptr.pp_double[n2][0], 1, &rx.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1), v);
26188 :
26189 : /*
26190 : * Initialization of RDY
26191 : */
26192 0 : ae_v_move(&rdy.ptr.pp_double[n2][0], 1, &ry.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1));
26193 :
26194 : /*
26195 : * Backward pass from N2 to N1:
26196 : * 1. Calculate R(dE/dX).
26197 : * 2. No R(dE/dWij) is needed since weight of activation neuron
26198 : * is fixed to 1. So we can update R(dE/dY) for
26199 : * the connected neuron (note that Vij=0, Wij=1)
26200 : */
26201 0 : df = network->columnsigmas.ptr.p_double[nin+i];
26202 0 : ae_v_moved(&rdx.ptr.pp_double[n2][0], 1, &rdy.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1), df);
26203 0 : ae_v_add(&rdy.ptr.pp_double[n1][0], 1, &rdx.ptr.pp_double[n2][0], 1, ae_v_len(0,wcount-1));
26204 : }
26205 : }
26206 : else
26207 : {
26208 :
26209 : /*
26210 : * Softmax.
26211 : *
26212 : * Initialize RDY using generalized expression for ei'(yi)
26213 : * (see expression (9) from p. 5 of "Fast Exact Multiplication by the Hessian").
26214 : *
26215 : * When we are working with softmax network, generalized
26216 : * expression for ei'(yi) is used because softmax
26217 : * normalization leads to ei, which depends on all y's
26218 : */
26219 0 : if( naturalerr )
26220 : {
26221 :
26222 : /*
26223 : * softmax + cross-entropy.
26224 : * We have:
26225 : *
26226 : * S = sum(exp(yk)),
26227 : * ei = sum(trn)*exp(yi)/S-trn_i
26228 : *
26229 : * j=i: d(ei)/d(yj) = T*exp(yi)*(S-exp(yi))/S^2
26230 : * j<>i: d(ei)/d(yj) = -T*exp(yi)*exp(yj)/S^2
26231 : */
26232 0 : t = (double)(0);
26233 0 : for(i=0; i<=nout-1; i++)
26234 : {
26235 0 : t = t+desiredy.ptr.p_double[i];
26236 : }
26237 0 : mx = network->neurons.ptr.p_double[ntotal-nout];
26238 0 : for(i=0; i<=nout-1; i++)
26239 : {
26240 0 : mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
26241 : }
26242 0 : s = (double)(0);
26243 0 : for(i=0; i<=nout-1; i++)
26244 : {
26245 0 : network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
26246 0 : s = s+network->nwbuf.ptr.p_double[i];
26247 : }
26248 0 : for(i=0; i<=nout-1; i++)
26249 : {
26250 0 : for(j=0; j<=nout-1; j++)
26251 : {
26252 0 : if( j==i )
26253 : {
26254 0 : deidyj = t*network->nwbuf.ptr.p_double[i]*(s-network->nwbuf.ptr.p_double[i])/ae_sqr(s, _state);
26255 0 : ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+i][0], 1, ae_v_len(0,wcount-1), deidyj);
26256 : }
26257 : else
26258 : {
26259 0 : deidyj = -t*network->nwbuf.ptr.p_double[i]*network->nwbuf.ptr.p_double[j]/ae_sqr(s, _state);
26260 0 : ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+j][0], 1, ae_v_len(0,wcount-1), deidyj);
26261 : }
26262 : }
26263 : }
26264 : }
26265 : else
26266 : {
26267 :
26268 : /*
26269 : * For a softmax + squared error we have expression
26270 : * far beyond human imagination so we dont even try
26271 : * to comment on it. Just enjoy the code...
26272 : *
26273 : * P.S. That's why "natural error" is called "natural" -
26274 : * compact beatiful expressions, fast code....
26275 : */
26276 0 : mx = network->neurons.ptr.p_double[ntotal-nout];
26277 0 : for(i=0; i<=nout-1; i++)
26278 : {
26279 0 : mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
26280 : }
26281 0 : s = (double)(0);
26282 0 : s2 = (double)(0);
26283 0 : for(i=0; i<=nout-1; i++)
26284 : {
26285 0 : network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
26286 0 : s = s+network->nwbuf.ptr.p_double[i];
26287 0 : s2 = s2+ae_sqr(network->nwbuf.ptr.p_double[i], _state);
26288 : }
26289 0 : q = (double)(0);
26290 0 : for(i=0; i<=nout-1; i++)
26291 : {
26292 0 : q = q+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])*network->nwbuf.ptr.p_double[i];
26293 : }
26294 0 : for(i=0; i<=nout-1; i++)
26295 : {
26296 0 : z = -q+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])*s;
26297 0 : expi = network->nwbuf.ptr.p_double[i];
26298 0 : for(j=0; j<=nout-1; j++)
26299 : {
26300 0 : expj = network->nwbuf.ptr.p_double[j];
26301 0 : if( j==i )
26302 : {
26303 0 : deidyj = expi/ae_sqr(s, _state)*((z+expi)*(s-2*expi)/s+expi*s2/ae_sqr(s, _state));
26304 : }
26305 : else
26306 : {
26307 0 : deidyj = expi*expj/ae_sqr(s, _state)*(s2/ae_sqr(s, _state)-2*z/s-(expi+expj)/s+(network->y.ptr.p_double[i]-desiredy.ptr.p_double[i])-(network->y.ptr.p_double[j]-desiredy.ptr.p_double[j]));
26308 : }
26309 0 : ae_v_addd(&rdy.ptr.pp_double[ntotal-nout+i][0], 1, &ry.ptr.pp_double[ntotal-nout+j][0], 1, ae_v_len(0,wcount-1), deidyj);
26310 : }
26311 : }
26312 : }
26313 : }
26314 :
26315 : /*
26316 : * Hessian. Backward pass of the R-algorithm
26317 : *
26318 : * Stage 2. Process.
26319 : */
26320 0 : for(i=ntotal-1; i>=0; i--)
26321 : {
26322 :
26323 : /*
26324 : * Possible variants:
26325 : * 1. Activation function
26326 : * 2. Adaptive summator
26327 : * 3. Special neuron
26328 : */
26329 0 : offs = istart+i*mlpbase_nfieldwidth;
26330 0 : if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
26331 : {
26332 0 : n1 = network->structinfo.ptr.p_int[offs+2];
26333 :
26334 : /*
26335 : * First, calculate R(dE/dX).
26336 : */
26337 0 : mlpactivationfunction(network->neurons.ptr.p_double[n1], network->structinfo.ptr.p_int[offs+0], &f, &df, &d2f, _state);
26338 0 : v = d2f*network->derror.ptr.p_double[i];
26339 0 : ae_v_moved(&rdx.ptr.pp_double[i][0], 1, &rdy.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), df);
26340 0 : ae_v_addd(&rdx.ptr.pp_double[i][0], 1, &rx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
26341 :
26342 : /*
26343 : * No R(dE/dWij) is needed since weight of activation neuron
26344 : * is fixed to 1.
26345 : *
26346 : * So we can update R(dE/dY) for the connected neuron.
26347 : * (note that Vij=0, Wij=1)
26348 : */
26349 0 : ae_v_add(&rdy.ptr.pp_double[n1][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
26350 0 : continue;
26351 : }
26352 0 : if( network->structinfo.ptr.p_int[offs+0]==0 )
26353 : {
26354 :
26355 : /*
26356 : * Adaptive summator
26357 : */
26358 0 : n1 = network->structinfo.ptr.p_int[offs+2];
26359 0 : n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
26360 0 : w1 = network->structinfo.ptr.p_int[offs+3];
26361 0 : w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
26362 :
26363 : /*
26364 : * First, calculate R(dE/dX).
26365 : */
26366 0 : ae_v_move(&rdx.ptr.pp_double[i][0], 1, &rdy.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
26367 :
26368 : /*
26369 : * Then, calculate R(dE/dWij)
26370 : */
26371 0 : for(j=w1; j<=w2; j++)
26372 : {
26373 0 : v = network->neurons.ptr.p_double[n1+j-w1];
26374 0 : ae_v_addd(&h->ptr.pp_double[j][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
26375 0 : v = network->derror.ptr.p_double[i];
26376 0 : ae_v_addd(&h->ptr.pp_double[j][0], 1, &ry.ptr.pp_double[n1+j-w1][0], 1, ae_v_len(0,wcount-1), v);
26377 : }
26378 :
26379 : /*
26380 : * And finally, update R(dE/dY) for connected neurons.
26381 : */
26382 0 : for(j=w1; j<=w2; j++)
26383 : {
26384 0 : v = network->weights.ptr.p_double[j];
26385 0 : ae_v_addd(&rdy.ptr.pp_double[n1+j-w1][0], 1, &rdx.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1), v);
26386 0 : rdy.ptr.pp_double[n1+j-w1][j] = rdy.ptr.pp_double[n1+j-w1][j]+network->derror.ptr.p_double[i];
26387 : }
26388 0 : continue;
26389 : }
26390 0 : if( network->structinfo.ptr.p_int[offs+0]<0 )
26391 : {
26392 0 : bflag = ae_false;
26393 0 : if( (network->structinfo.ptr.p_int[offs+0]==-2||network->structinfo.ptr.p_int[offs+0]==-3)||network->structinfo.ptr.p_int[offs+0]==-4 )
26394 : {
26395 :
26396 : /*
26397 : * Special neuron type, no back-propagation required
26398 : */
26399 0 : bflag = ae_true;
26400 : }
26401 0 : ae_assert(bflag, "MLPHessianNBatch: unknown neuron type!", _state);
26402 0 : continue;
26403 : }
26404 : }
26405 : }
26406 0 : ae_frame_leave(_state);
26407 0 : }
26408 :
26409 :
26410 : /*************************************************************************
26411 : Internal subroutine
26412 :
26413 : Network must be processed by MLPProcess on X
26414 : *************************************************************************/
26415 0 : static void mlpbase_mlpinternalcalculategradient(multilayerperceptron* network,
26416 : /* Real */ ae_vector* neurons,
26417 : /* Real */ ae_vector* weights,
26418 : /* Real */ ae_vector* derror,
26419 : /* Real */ ae_vector* grad,
26420 : ae_bool naturalerrorfunc,
26421 : ae_state *_state)
26422 : {
26423 : ae_int_t i;
26424 : ae_int_t n1;
26425 : ae_int_t n2;
26426 : ae_int_t w1;
26427 : ae_int_t w2;
26428 : ae_int_t ntotal;
26429 : ae_int_t istart;
26430 : ae_int_t nin;
26431 : ae_int_t nout;
26432 : ae_int_t offs;
26433 : double dedf;
26434 : double dfdnet;
26435 : double v;
26436 : double fown;
26437 : double deown;
26438 : double net;
26439 : double mx;
26440 : ae_bool bflag;
26441 :
26442 :
26443 :
26444 : /*
26445 : * Read network geometry
26446 : */
26447 0 : nin = network->structinfo.ptr.p_int[1];
26448 0 : nout = network->structinfo.ptr.p_int[2];
26449 0 : ntotal = network->structinfo.ptr.p_int[3];
26450 0 : istart = network->structinfo.ptr.p_int[5];
26451 :
26452 : /*
26453 : * Pre-processing of dError/dOut:
26454 : * from dError/dOut(normalized) to dError/dOut(non-normalized)
26455 : */
26456 0 : ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPInternalCalculateGradient: unknown normalization type!", _state);
26457 0 : if( network->structinfo.ptr.p_int[6]==1 )
26458 : {
26459 :
26460 : /*
26461 : * Softmax
26462 : */
26463 0 : if( !naturalerrorfunc )
26464 : {
26465 0 : mx = network->neurons.ptr.p_double[ntotal-nout];
26466 0 : for(i=0; i<=nout-1; i++)
26467 : {
26468 0 : mx = ae_maxreal(mx, network->neurons.ptr.p_double[ntotal-nout+i], _state);
26469 : }
26470 0 : net = (double)(0);
26471 0 : for(i=0; i<=nout-1; i++)
26472 : {
26473 0 : network->nwbuf.ptr.p_double[i] = ae_exp(network->neurons.ptr.p_double[ntotal-nout+i]-mx, _state);
26474 0 : net = net+network->nwbuf.ptr.p_double[i];
26475 : }
26476 0 : v = ae_v_dotproduct(&network->derror.ptr.p_double[ntotal-nout], 1, &network->nwbuf.ptr.p_double[0], 1, ae_v_len(ntotal-nout,ntotal-1));
26477 0 : for(i=0; i<=nout-1; i++)
26478 : {
26479 0 : fown = network->nwbuf.ptr.p_double[i];
26480 0 : deown = network->derror.ptr.p_double[ntotal-nout+i];
26481 0 : network->nwbuf.ptr.p_double[nout+i] = (-v+deown*fown+deown*(net-fown))*fown/ae_sqr(net, _state);
26482 : }
26483 0 : for(i=0; i<=nout-1; i++)
26484 : {
26485 0 : network->derror.ptr.p_double[ntotal-nout+i] = network->nwbuf.ptr.p_double[nout+i];
26486 : }
26487 : }
26488 : }
26489 : else
26490 : {
26491 :
26492 : /*
26493 : * Un-standardisation
26494 : */
26495 0 : for(i=0; i<=nout-1; i++)
26496 : {
26497 0 : network->derror.ptr.p_double[ntotal-nout+i] = network->derror.ptr.p_double[ntotal-nout+i]*network->columnsigmas.ptr.p_double[nin+i];
26498 : }
26499 : }
26500 :
26501 : /*
26502 : * Backpropagation
26503 : */
26504 0 : for(i=ntotal-1; i>=0; i--)
26505 : {
26506 :
26507 : /*
26508 : * Extract info
26509 : */
26510 0 : offs = istart+i*mlpbase_nfieldwidth;
26511 0 : if( network->structinfo.ptr.p_int[offs+0]>0||network->structinfo.ptr.p_int[offs+0]==-5 )
26512 : {
26513 :
26514 : /*
26515 : * Activation function
26516 : */
26517 0 : dedf = network->derror.ptr.p_double[i];
26518 0 : dfdnet = network->dfdnet.ptr.p_double[i];
26519 0 : derror->ptr.p_double[network->structinfo.ptr.p_int[offs+2]] = derror->ptr.p_double[network->structinfo.ptr.p_int[offs+2]]+dedf*dfdnet;
26520 0 : continue;
26521 : }
26522 0 : if( network->structinfo.ptr.p_int[offs+0]==0 )
26523 : {
26524 :
26525 : /*
26526 : * Adaptive summator
26527 : */
26528 0 : n1 = network->structinfo.ptr.p_int[offs+2];
26529 0 : n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
26530 0 : w1 = network->structinfo.ptr.p_int[offs+3];
26531 0 : w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
26532 0 : dedf = network->derror.ptr.p_double[i];
26533 0 : dfdnet = 1.0;
26534 0 : v = dedf*dfdnet;
26535 0 : ae_v_moved(&grad->ptr.p_double[w1], 1, &neurons->ptr.p_double[n1], 1, ae_v_len(w1,w2), v);
26536 0 : ae_v_addd(&derror->ptr.p_double[n1], 1, &weights->ptr.p_double[w1], 1, ae_v_len(n1,n2), v);
26537 0 : continue;
26538 : }
26539 0 : if( network->structinfo.ptr.p_int[offs+0]<0 )
26540 : {
26541 0 : bflag = ae_false;
26542 0 : if( (network->structinfo.ptr.p_int[offs+0]==-2||network->structinfo.ptr.p_int[offs+0]==-3)||network->structinfo.ptr.p_int[offs+0]==-4 )
26543 : {
26544 :
26545 : /*
26546 : * Special neuron type, no back-propagation required
26547 : */
26548 0 : bflag = ae_true;
26549 : }
26550 0 : ae_assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!", _state);
26551 0 : continue;
26552 : }
26553 : }
26554 0 : }
26555 :
26556 :
26557 0 : static void mlpbase_mlpchunkedgradient(multilayerperceptron* network,
26558 : /* Real */ ae_matrix* xy,
26559 : ae_int_t cstart,
26560 : ae_int_t csize,
26561 : /* Real */ ae_vector* batch4buf,
26562 : /* Real */ ae_vector* hpcbuf,
26563 : double* e,
26564 : ae_bool naturalerrorfunc,
26565 : ae_state *_state)
26566 : {
26567 : ae_int_t i;
26568 : ae_int_t j;
26569 : ae_int_t k;
26570 : ae_int_t kl;
26571 : ae_int_t ntotal;
26572 : ae_int_t nin;
26573 : ae_int_t nout;
26574 : ae_int_t offs;
26575 : double f;
26576 : double df;
26577 : double d2f;
26578 : double v;
26579 : double vv;
26580 : double s;
26581 : double fown;
26582 : double deown;
26583 : ae_bool bflag;
26584 : ae_int_t istart;
26585 : ae_int_t entrysize;
26586 : ae_int_t dfoffs;
26587 : ae_int_t derroroffs;
26588 : ae_int_t entryoffs;
26589 : ae_int_t neuronidx;
26590 : ae_int_t srcentryoffs;
26591 : ae_int_t srcneuronidx;
26592 : ae_int_t srcweightidx;
26593 : ae_int_t neurontype;
26594 : ae_int_t nweights;
26595 : ae_int_t offs0;
26596 : ae_int_t offs1;
26597 : ae_int_t offs2;
26598 : double v0;
26599 : double v1;
26600 : double v2;
26601 : double v3;
26602 : double s0;
26603 : double s1;
26604 : double s2;
26605 : double s3;
26606 : ae_int_t chunksize;
26607 :
26608 :
26609 0 : chunksize = 4;
26610 0 : ae_assert(csize<=chunksize, "MLPChunkedGradient: internal error (CSize>ChunkSize)", _state);
26611 :
26612 : /*
26613 : * Try to use HPC core, if possible
26614 : */
26615 0 : if( hpcchunkedgradient(&network->weights, &network->structinfo, &network->columnmeans, &network->columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc, _state) )
26616 : {
26617 0 : return;
26618 : }
26619 :
26620 : /*
26621 : * Read network geometry, prepare data
26622 : */
26623 0 : nin = network->structinfo.ptr.p_int[1];
26624 0 : nout = network->structinfo.ptr.p_int[2];
26625 0 : ntotal = network->structinfo.ptr.p_int[3];
26626 0 : istart = network->structinfo.ptr.p_int[5];
26627 0 : entrysize = 12;
26628 0 : dfoffs = 4;
26629 0 : derroroffs = 8;
26630 :
26631 : /*
26632 : * Fill Batch4Buf by zeros.
26633 : *
26634 : * THIS STAGE IS VERY IMPORTANT!
26635 : *
26636 : * We fill all components of entry - neuron values, dF/dNET, dError/dF.
26637 : * It allows us to easily handle situations when CSize<ChunkSize by
26638 : * simply working with ALL components of Batch4Buf, without ever
26639 : * looking at CSize. The idea is that dError/dF for absent components
26640 : * will be initialized by zeros - and won't be rewritten by non-zero
26641 : * values during backpropagation.
26642 : */
26643 0 : for(i=0; i<=entrysize*ntotal-1; i++)
26644 : {
26645 0 : batch4buf->ptr.p_double[i] = (double)(0);
26646 : }
26647 :
26648 : /*
26649 : * Forward pass:
26650 : * 1. Load data into Batch4Buf. If CSize<ChunkSize, data are padded by zeros.
26651 : * 2. Perform forward pass through network
26652 : */
26653 0 : for(i=0; i<=nin-1; i++)
26654 : {
26655 0 : entryoffs = entrysize*i;
26656 0 : for(j=0; j<=csize-1; j++)
26657 : {
26658 0 : if( ae_fp_neq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
26659 : {
26660 0 : batch4buf->ptr.p_double[entryoffs+j] = (xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i])/network->columnsigmas.ptr.p_double[i];
26661 : }
26662 : else
26663 : {
26664 0 : batch4buf->ptr.p_double[entryoffs+j] = xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i];
26665 : }
26666 : }
26667 : }
26668 0 : for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
26669 : {
26670 0 : entryoffs = entrysize*neuronidx;
26671 0 : offs = istart+neuronidx*mlpbase_nfieldwidth;
26672 0 : neurontype = network->structinfo.ptr.p_int[offs+0];
26673 0 : if( neurontype>0||neurontype==-5 )
26674 : {
26675 :
26676 : /*
26677 : * "activation function" neuron, which takes value of neuron SrcNeuronIdx
26678 : * and applies activation function to it.
26679 : *
26680 : * This neuron has no weights and no tunable parameters.
26681 : */
26682 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
26683 0 : srcentryoffs = entrysize*srcneuronidx;
26684 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+0], neurontype, &f, &df, &d2f, _state);
26685 0 : batch4buf->ptr.p_double[entryoffs+0] = f;
26686 0 : batch4buf->ptr.p_double[entryoffs+0+dfoffs] = df;
26687 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+1], neurontype, &f, &df, &d2f, _state);
26688 0 : batch4buf->ptr.p_double[entryoffs+1] = f;
26689 0 : batch4buf->ptr.p_double[entryoffs+1+dfoffs] = df;
26690 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+2], neurontype, &f, &df, &d2f, _state);
26691 0 : batch4buf->ptr.p_double[entryoffs+2] = f;
26692 0 : batch4buf->ptr.p_double[entryoffs+2+dfoffs] = df;
26693 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+3], neurontype, &f, &df, &d2f, _state);
26694 0 : batch4buf->ptr.p_double[entryoffs+3] = f;
26695 0 : batch4buf->ptr.p_double[entryoffs+3+dfoffs] = df;
26696 0 : continue;
26697 : }
26698 0 : if( neurontype==0 )
26699 : {
26700 :
26701 : /*
26702 : * "adaptive summator" neuron, whose output is a weighted sum of inputs.
26703 : * It has weights, but has no activation function.
26704 : */
26705 0 : nweights = network->structinfo.ptr.p_int[offs+1];
26706 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
26707 0 : srcentryoffs = entrysize*srcneuronidx;
26708 0 : srcweightidx = network->structinfo.ptr.p_int[offs+3];
26709 0 : v0 = (double)(0);
26710 0 : v1 = (double)(0);
26711 0 : v2 = (double)(0);
26712 0 : v3 = (double)(0);
26713 0 : for(j=0; j<=nweights-1; j++)
26714 : {
26715 0 : v = network->weights.ptr.p_double[srcweightidx];
26716 0 : srcweightidx = srcweightidx+1;
26717 0 : v0 = v0+v*batch4buf->ptr.p_double[srcentryoffs+0];
26718 0 : v1 = v1+v*batch4buf->ptr.p_double[srcentryoffs+1];
26719 0 : v2 = v2+v*batch4buf->ptr.p_double[srcentryoffs+2];
26720 0 : v3 = v3+v*batch4buf->ptr.p_double[srcentryoffs+3];
26721 0 : srcentryoffs = srcentryoffs+entrysize;
26722 : }
26723 0 : batch4buf->ptr.p_double[entryoffs+0] = v0;
26724 0 : batch4buf->ptr.p_double[entryoffs+1] = v1;
26725 0 : batch4buf->ptr.p_double[entryoffs+2] = v2;
26726 0 : batch4buf->ptr.p_double[entryoffs+3] = v3;
26727 0 : batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(1);
26728 0 : batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(1);
26729 0 : batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(1);
26730 0 : batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(1);
26731 0 : continue;
26732 : }
26733 0 : if( neurontype<0 )
26734 : {
26735 0 : bflag = ae_false;
26736 0 : if( neurontype==-2 )
26737 : {
26738 :
26739 : /*
26740 : * Input neuron, left unchanged
26741 : */
26742 0 : bflag = ae_true;
26743 : }
26744 0 : if( neurontype==-3 )
26745 : {
26746 :
26747 : /*
26748 : * "-1" neuron
26749 : */
26750 0 : batch4buf->ptr.p_double[entryoffs+0] = (double)(-1);
26751 0 : batch4buf->ptr.p_double[entryoffs+1] = (double)(-1);
26752 0 : batch4buf->ptr.p_double[entryoffs+2] = (double)(-1);
26753 0 : batch4buf->ptr.p_double[entryoffs+3] = (double)(-1);
26754 0 : batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(0);
26755 0 : batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(0);
26756 0 : batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(0);
26757 0 : batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(0);
26758 0 : bflag = ae_true;
26759 : }
26760 0 : if( neurontype==-4 )
26761 : {
26762 :
26763 : /*
26764 : * "0" neuron
26765 : */
26766 0 : batch4buf->ptr.p_double[entryoffs+0] = (double)(0);
26767 0 : batch4buf->ptr.p_double[entryoffs+1] = (double)(0);
26768 0 : batch4buf->ptr.p_double[entryoffs+2] = (double)(0);
26769 0 : batch4buf->ptr.p_double[entryoffs+3] = (double)(0);
26770 0 : batch4buf->ptr.p_double[entryoffs+0+dfoffs] = (double)(0);
26771 0 : batch4buf->ptr.p_double[entryoffs+1+dfoffs] = (double)(0);
26772 0 : batch4buf->ptr.p_double[entryoffs+2+dfoffs] = (double)(0);
26773 0 : batch4buf->ptr.p_double[entryoffs+3+dfoffs] = (double)(0);
26774 0 : bflag = ae_true;
26775 : }
26776 0 : ae_assert(bflag, "MLPChunkedGradient: internal error - unknown neuron type!", _state);
26777 0 : continue;
26778 : }
26779 : }
26780 :
26781 : /*
26782 : * Intermediate phase between forward and backward passes.
26783 : *
26784 : * For regression networks:
26785 : * * forward pass is completely done (no additional post-processing is
26786 : * needed).
26787 : * * before starting backward pass, we have to calculate dError/dOut
26788 : * for output neurons. We also update error at this phase.
26789 : *
26790 : * For classification networks:
26791 : * * in addition to forward pass we apply SOFTMAX normalization to
26792 : * output neurons.
26793 : * * after applying normalization, we have to calculate dError/dOut,
26794 : * which is calculated in two steps:
26795 : * * first, we calculate derivative of error with respect to SOFTMAX
26796 : * normalized outputs (normalized dError)
26797 : * * then, we calculate derivative of error with respect to values
26798 : * of outputs BEFORE normalization was applied to them
26799 : */
26800 0 : ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPChunkedGradient: unknown normalization type!", _state);
26801 0 : if( network->structinfo.ptr.p_int[6]==1 )
26802 : {
26803 :
26804 : /*
26805 : * SOFTMAX-normalized network.
26806 : *
26807 : * First, calculate (V0,V1,V2,V3) - component-wise maximum
26808 : * of output neurons. This vector of maximum values will be
26809 : * used for normalization of outputs prior to calculating
26810 : * exponentials.
26811 : *
26812 : * NOTE: the only purpose of this stage is to prevent overflow
26813 : * during calculation of exponentials. With this stage
26814 : * we make sure that all exponentials are calculated
26815 : * with non-positive argument. If you load (0,0,0,0) to
26816 : * (V0,V1,V2,V3), your program will continue working -
26817 : * although with less robustness.
26818 : */
26819 0 : entryoffs = entrysize*(ntotal-nout);
26820 0 : v0 = batch4buf->ptr.p_double[entryoffs+0];
26821 0 : v1 = batch4buf->ptr.p_double[entryoffs+1];
26822 0 : v2 = batch4buf->ptr.p_double[entryoffs+2];
26823 0 : v3 = batch4buf->ptr.p_double[entryoffs+3];
26824 0 : entryoffs = entryoffs+entrysize;
26825 0 : for(i=1; i<=nout-1; i++)
26826 : {
26827 0 : v = batch4buf->ptr.p_double[entryoffs+0];
26828 0 : if( v>v0 )
26829 : {
26830 0 : v0 = v;
26831 : }
26832 0 : v = batch4buf->ptr.p_double[entryoffs+1];
26833 0 : if( v>v1 )
26834 : {
26835 0 : v1 = v;
26836 : }
26837 0 : v = batch4buf->ptr.p_double[entryoffs+2];
26838 0 : if( v>v2 )
26839 : {
26840 0 : v2 = v;
26841 : }
26842 0 : v = batch4buf->ptr.p_double[entryoffs+3];
26843 0 : if( v>v3 )
26844 : {
26845 0 : v3 = v;
26846 : }
26847 0 : entryoffs = entryoffs+entrysize;
26848 : }
26849 :
26850 : /*
26851 : * Then, calculate exponentials and place them to part of the
26852 : * array which is located past the last entry. We also
26853 : * calculate sum of exponentials which will be stored past the
26854 : * exponentials.
26855 : */
26856 0 : entryoffs = entrysize*(ntotal-nout);
26857 0 : offs0 = entrysize*ntotal;
26858 0 : s0 = (double)(0);
26859 0 : s1 = (double)(0);
26860 0 : s2 = (double)(0);
26861 0 : s3 = (double)(0);
26862 0 : for(i=0; i<=nout-1; i++)
26863 : {
26864 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+0]-v0, _state);
26865 0 : s0 = s0+v;
26866 0 : batch4buf->ptr.p_double[offs0+0] = v;
26867 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+1]-v1, _state);
26868 0 : s1 = s1+v;
26869 0 : batch4buf->ptr.p_double[offs0+1] = v;
26870 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+2]-v2, _state);
26871 0 : s2 = s2+v;
26872 0 : batch4buf->ptr.p_double[offs0+2] = v;
26873 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+3]-v3, _state);
26874 0 : s3 = s3+v;
26875 0 : batch4buf->ptr.p_double[offs0+3] = v;
26876 0 : entryoffs = entryoffs+entrysize;
26877 0 : offs0 = offs0+chunksize;
26878 : }
26879 0 : offs0 = entrysize*ntotal+2*nout*chunksize;
26880 0 : batch4buf->ptr.p_double[offs0+0] = s0;
26881 0 : batch4buf->ptr.p_double[offs0+1] = s1;
26882 0 : batch4buf->ptr.p_double[offs0+2] = s2;
26883 0 : batch4buf->ptr.p_double[offs0+3] = s3;
26884 :
26885 : /*
26886 : * Now we have:
26887 : * * Batch4Buf[0...EntrySize*NTotal-1] stores:
26888 : * * NTotal*ChunkSize neuron output values (SOFTMAX normalization
26889 : * was not applied to these values),
26890 : * * NTotal*ChunkSize values of dF/dNET (derivative of neuron
26891 : * output with respect to its input)
26892 : * * NTotal*ChunkSize zeros in the elements which correspond to
26893 : * dError/dOut (derivative of error with respect to neuron output).
26894 : * * Batch4Buf[EntrySize*NTotal...EntrySize*NTotal+ChunkSize*NOut-1] -
26895 : * stores exponentials of last NOut neurons.
26896 : * * Batch4Buf[EntrySize*NTotal+ChunkSize*NOut-1...EntrySize*NTotal+ChunkSize*2*NOut-1]
26897 : * - can be used for temporary calculations
26898 : * * Batch4Buf[EntrySize*NTotal+ChunkSize*2*NOut...EntrySize*NTotal+ChunkSize*2*NOut+ChunkSize-1]
26899 : * - stores sum-of-exponentials
26900 : *
26901 : * Block below calculates derivatives of error function with respect
26902 : * to non-SOFTMAX-normalized output values of last NOut neurons.
26903 : *
26904 : * It is quite complicated; we do not describe algebra behind it,
26905 : * but if you want you may check it yourself :)
26906 : */
26907 0 : if( naturalerrorfunc )
26908 : {
26909 :
26910 : /*
26911 : * Calculate derivative of error with respect to values of
26912 : * output neurons PRIOR TO SOFTMAX NORMALIZATION. Because we
26913 : * use natural error function (cross-entropy), we can do so
26914 : * very easy.
26915 : */
26916 0 : offs0 = entrysize*ntotal+2*nout*chunksize;
26917 0 : for(k=0; k<=csize-1; k++)
26918 : {
26919 0 : s = batch4buf->ptr.p_double[offs0+k];
26920 0 : kl = ae_round(xy->ptr.pp_double[cstart+k][nin], _state);
26921 0 : offs1 = (ntotal-nout)*entrysize+derroroffs+k;
26922 0 : offs2 = entrysize*ntotal+k;
26923 0 : for(i=0; i<=nout-1; i++)
26924 : {
26925 0 : if( i==kl )
26926 : {
26927 0 : v = (double)(1);
26928 : }
26929 : else
26930 : {
26931 0 : v = (double)(0);
26932 : }
26933 0 : vv = batch4buf->ptr.p_double[offs2];
26934 0 : batch4buf->ptr.p_double[offs1] = vv/s-v;
26935 0 : *e = *e+mlpbase_safecrossentropy(v, vv/s, _state);
26936 0 : offs1 = offs1+entrysize;
26937 0 : offs2 = offs2+chunksize;
26938 : }
26939 : }
26940 : }
26941 : else
26942 : {
26943 :
26944 : /*
26945 : * SOFTMAX normalization makes things very difficult.
26946 : * Sorry, we do not dare to describe this esoteric math
26947 : * in details.
26948 : */
26949 0 : offs0 = entrysize*ntotal+chunksize*2*nout;
26950 0 : for(k=0; k<=csize-1; k++)
26951 : {
26952 0 : s = batch4buf->ptr.p_double[offs0+k];
26953 0 : kl = ae_round(xy->ptr.pp_double[cstart+k][nin], _state);
26954 0 : vv = (double)(0);
26955 0 : offs1 = entrysize*ntotal+k;
26956 0 : offs2 = entrysize*ntotal+nout*chunksize+k;
26957 0 : for(i=0; i<=nout-1; i++)
26958 : {
26959 0 : fown = batch4buf->ptr.p_double[offs1];
26960 0 : if( i==kl )
26961 : {
26962 0 : deown = fown/s-1;
26963 : }
26964 : else
26965 : {
26966 0 : deown = fown/s;
26967 : }
26968 0 : batch4buf->ptr.p_double[offs2] = deown;
26969 0 : vv = vv+deown*fown;
26970 0 : *e = *e+deown*deown/2;
26971 0 : offs1 = offs1+chunksize;
26972 0 : offs2 = offs2+chunksize;
26973 : }
26974 0 : offs1 = entrysize*ntotal+k;
26975 0 : offs2 = entrysize*ntotal+nout*chunksize+k;
26976 0 : for(i=0; i<=nout-1; i++)
26977 : {
26978 0 : fown = batch4buf->ptr.p_double[offs1];
26979 0 : deown = batch4buf->ptr.p_double[offs2];
26980 0 : batch4buf->ptr.p_double[(ntotal-nout+i)*entrysize+derroroffs+k] = (-vv+deown*fown+deown*(s-fown))*fown/ae_sqr(s, _state);
26981 0 : offs1 = offs1+chunksize;
26982 0 : offs2 = offs2+chunksize;
26983 : }
26984 : }
26985 : }
26986 : }
26987 : else
26988 : {
26989 :
26990 : /*
26991 : * Regression network with sum-of-squares function.
26992 : *
26993 : * For each NOut of last neurons:
26994 : * * calculate difference between actual and desired output
26995 : * * calculate dError/dOut for this neuron (proportional to difference)
26996 : * * store in in last 4 components of entry (these values are used
26997 : * to start backpropagation)
26998 : * * update error
26999 : */
27000 0 : for(i=0; i<=nout-1; i++)
27001 : {
27002 0 : v0 = network->columnsigmas.ptr.p_double[nin+i];
27003 0 : v1 = network->columnmeans.ptr.p_double[nin+i];
27004 0 : entryoffs = entrysize*(ntotal-nout+i);
27005 0 : offs0 = entryoffs;
27006 0 : offs1 = entryoffs+derroroffs;
27007 0 : for(j=0; j<=csize-1; j++)
27008 : {
27009 0 : v = batch4buf->ptr.p_double[offs0+j]*v0+v1-xy->ptr.pp_double[cstart+j][nin+i];
27010 0 : batch4buf->ptr.p_double[offs1+j] = v*v0;
27011 0 : *e = *e+v*v/2;
27012 : }
27013 : }
27014 : }
27015 :
27016 : /*
27017 : * Backpropagation
27018 : */
27019 0 : for(neuronidx=ntotal-1; neuronidx>=0; neuronidx--)
27020 : {
27021 0 : entryoffs = entrysize*neuronidx;
27022 0 : offs = istart+neuronidx*mlpbase_nfieldwidth;
27023 0 : neurontype = network->structinfo.ptr.p_int[offs+0];
27024 0 : if( neurontype>0||neurontype==-5 )
27025 : {
27026 :
27027 : /*
27028 : * Activation function
27029 : */
27030 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
27031 0 : srcentryoffs = entrysize*srcneuronidx;
27032 0 : offs0 = srcentryoffs+derroroffs;
27033 0 : offs1 = entryoffs+derroroffs;
27034 0 : offs2 = entryoffs+dfoffs;
27035 0 : batch4buf->ptr.p_double[offs0+0] = batch4buf->ptr.p_double[offs0+0]+batch4buf->ptr.p_double[offs1+0]*batch4buf->ptr.p_double[offs2+0];
27036 0 : batch4buf->ptr.p_double[offs0+1] = batch4buf->ptr.p_double[offs0+1]+batch4buf->ptr.p_double[offs1+1]*batch4buf->ptr.p_double[offs2+1];
27037 0 : batch4buf->ptr.p_double[offs0+2] = batch4buf->ptr.p_double[offs0+2]+batch4buf->ptr.p_double[offs1+2]*batch4buf->ptr.p_double[offs2+2];
27038 0 : batch4buf->ptr.p_double[offs0+3] = batch4buf->ptr.p_double[offs0+3]+batch4buf->ptr.p_double[offs1+3]*batch4buf->ptr.p_double[offs2+3];
27039 0 : continue;
27040 : }
27041 0 : if( neurontype==0 )
27042 : {
27043 :
27044 : /*
27045 : * Adaptive summator
27046 : */
27047 0 : nweights = network->structinfo.ptr.p_int[offs+1];
27048 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
27049 0 : srcentryoffs = entrysize*srcneuronidx;
27050 0 : srcweightidx = network->structinfo.ptr.p_int[offs+3];
27051 0 : v0 = batch4buf->ptr.p_double[entryoffs+derroroffs+0];
27052 0 : v1 = batch4buf->ptr.p_double[entryoffs+derroroffs+1];
27053 0 : v2 = batch4buf->ptr.p_double[entryoffs+derroroffs+2];
27054 0 : v3 = batch4buf->ptr.p_double[entryoffs+derroroffs+3];
27055 0 : for(j=0; j<=nweights-1; j++)
27056 : {
27057 0 : offs0 = srcentryoffs;
27058 0 : offs1 = srcentryoffs+derroroffs;
27059 0 : v = network->weights.ptr.p_double[srcweightidx];
27060 0 : hpcbuf->ptr.p_double[srcweightidx] = hpcbuf->ptr.p_double[srcweightidx]+batch4buf->ptr.p_double[offs0+0]*v0+batch4buf->ptr.p_double[offs0+1]*v1+batch4buf->ptr.p_double[offs0+2]*v2+batch4buf->ptr.p_double[offs0+3]*v3;
27061 0 : batch4buf->ptr.p_double[offs1+0] = batch4buf->ptr.p_double[offs1+0]+v*v0;
27062 0 : batch4buf->ptr.p_double[offs1+1] = batch4buf->ptr.p_double[offs1+1]+v*v1;
27063 0 : batch4buf->ptr.p_double[offs1+2] = batch4buf->ptr.p_double[offs1+2]+v*v2;
27064 0 : batch4buf->ptr.p_double[offs1+3] = batch4buf->ptr.p_double[offs1+3]+v*v3;
27065 0 : srcentryoffs = srcentryoffs+entrysize;
27066 0 : srcweightidx = srcweightidx+1;
27067 : }
27068 0 : continue;
27069 : }
27070 0 : if( neurontype<0 )
27071 : {
27072 0 : bflag = ae_false;
27073 0 : if( (neurontype==-2||neurontype==-3)||neurontype==-4 )
27074 : {
27075 :
27076 : /*
27077 : * Special neuron type, no back-propagation required
27078 : */
27079 0 : bflag = ae_true;
27080 : }
27081 0 : ae_assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!", _state);
27082 0 : continue;
27083 : }
27084 : }
27085 : }
27086 :
27087 :
27088 0 : static void mlpbase_mlpchunkedprocess(multilayerperceptron* network,
27089 : /* Real */ ae_matrix* xy,
27090 : ae_int_t cstart,
27091 : ae_int_t csize,
27092 : /* Real */ ae_vector* batch4buf,
27093 : /* Real */ ae_vector* hpcbuf,
27094 : ae_state *_state)
27095 : {
27096 : ae_int_t i;
27097 : ae_int_t j;
27098 : ae_int_t ntotal;
27099 : ae_int_t nin;
27100 : ae_int_t nout;
27101 : ae_int_t offs;
27102 : double f;
27103 : double df;
27104 : double d2f;
27105 : double v;
27106 : ae_bool bflag;
27107 : ae_int_t istart;
27108 : ae_int_t entrysize;
27109 : ae_int_t entryoffs;
27110 : ae_int_t neuronidx;
27111 : ae_int_t srcentryoffs;
27112 : ae_int_t srcneuronidx;
27113 : ae_int_t srcweightidx;
27114 : ae_int_t neurontype;
27115 : ae_int_t nweights;
27116 : ae_int_t offs0;
27117 : double v0;
27118 : double v1;
27119 : double v2;
27120 : double v3;
27121 : double s0;
27122 : double s1;
27123 : double s2;
27124 : double s3;
27125 : ae_int_t chunksize;
27126 :
27127 :
27128 0 : chunksize = 4;
27129 0 : ae_assert(csize<=chunksize, "MLPChunkedProcess: internal error (CSize>ChunkSize)", _state);
27130 :
27131 : /*
27132 : * Try to use HPC core, if possible
27133 : */
27134 0 : if( hpcchunkedprocess(&network->weights, &network->structinfo, &network->columnmeans, &network->columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, _state) )
27135 : {
27136 0 : return;
27137 : }
27138 :
27139 : /*
27140 : * Read network geometry, prepare data
27141 : */
27142 0 : nin = network->structinfo.ptr.p_int[1];
27143 0 : nout = network->structinfo.ptr.p_int[2];
27144 0 : ntotal = network->structinfo.ptr.p_int[3];
27145 0 : istart = network->structinfo.ptr.p_int[5];
27146 0 : entrysize = 4;
27147 :
27148 : /*
27149 : * Fill Batch4Buf by zeros.
27150 : *
27151 : * THIS STAGE IS VERY IMPORTANT!
27152 : *
27153 : * We fill all components of entry - neuron values, dF/dNET, dError/dF.
27154 : * It allows us to easily handle situations when CSize<ChunkSize by
27155 : * simply working with ALL components of Batch4Buf, without ever
27156 : * looking at CSize.
27157 : */
27158 0 : for(i=0; i<=entrysize*ntotal-1; i++)
27159 : {
27160 0 : batch4buf->ptr.p_double[i] = (double)(0);
27161 : }
27162 :
27163 : /*
27164 : * Forward pass:
27165 : * 1. Load data into Batch4Buf. If CSize<ChunkSize, data are padded by zeros.
27166 : * 2. Perform forward pass through network
27167 : */
27168 0 : for(i=0; i<=nin-1; i++)
27169 : {
27170 0 : entryoffs = entrysize*i;
27171 0 : for(j=0; j<=csize-1; j++)
27172 : {
27173 0 : if( ae_fp_neq(network->columnsigmas.ptr.p_double[i],(double)(0)) )
27174 : {
27175 0 : batch4buf->ptr.p_double[entryoffs+j] = (xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i])/network->columnsigmas.ptr.p_double[i];
27176 : }
27177 : else
27178 : {
27179 0 : batch4buf->ptr.p_double[entryoffs+j] = xy->ptr.pp_double[cstart+j][i]-network->columnmeans.ptr.p_double[i];
27180 : }
27181 : }
27182 : }
27183 0 : for(neuronidx=0; neuronidx<=ntotal-1; neuronidx++)
27184 : {
27185 0 : entryoffs = entrysize*neuronidx;
27186 0 : offs = istart+neuronidx*mlpbase_nfieldwidth;
27187 0 : neurontype = network->structinfo.ptr.p_int[offs+0];
27188 0 : if( neurontype>0||neurontype==-5 )
27189 : {
27190 :
27191 : /*
27192 : * "activation function" neuron, which takes value of neuron SrcNeuronIdx
27193 : * and applies activation function to it.
27194 : *
27195 : * This neuron has no weights and no tunable parameters.
27196 : */
27197 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
27198 0 : srcentryoffs = entrysize*srcneuronidx;
27199 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+0], neurontype, &f, &df, &d2f, _state);
27200 0 : batch4buf->ptr.p_double[entryoffs+0] = f;
27201 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+1], neurontype, &f, &df, &d2f, _state);
27202 0 : batch4buf->ptr.p_double[entryoffs+1] = f;
27203 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+2], neurontype, &f, &df, &d2f, _state);
27204 0 : batch4buf->ptr.p_double[entryoffs+2] = f;
27205 0 : mlpactivationfunction(batch4buf->ptr.p_double[srcentryoffs+3], neurontype, &f, &df, &d2f, _state);
27206 0 : batch4buf->ptr.p_double[entryoffs+3] = f;
27207 0 : continue;
27208 : }
27209 0 : if( neurontype==0 )
27210 : {
27211 :
27212 : /*
27213 : * "adaptive summator" neuron, whose output is a weighted sum of inputs.
27214 : * It has weights, but has no activation function.
27215 : */
27216 0 : nweights = network->structinfo.ptr.p_int[offs+1];
27217 0 : srcneuronidx = network->structinfo.ptr.p_int[offs+2];
27218 0 : srcentryoffs = entrysize*srcneuronidx;
27219 0 : srcweightidx = network->structinfo.ptr.p_int[offs+3];
27220 0 : v0 = (double)(0);
27221 0 : v1 = (double)(0);
27222 0 : v2 = (double)(0);
27223 0 : v3 = (double)(0);
27224 0 : for(j=0; j<=nweights-1; j++)
27225 : {
27226 0 : v = network->weights.ptr.p_double[srcweightidx];
27227 0 : srcweightidx = srcweightidx+1;
27228 0 : v0 = v0+v*batch4buf->ptr.p_double[srcentryoffs+0];
27229 0 : v1 = v1+v*batch4buf->ptr.p_double[srcentryoffs+1];
27230 0 : v2 = v2+v*batch4buf->ptr.p_double[srcentryoffs+2];
27231 0 : v3 = v3+v*batch4buf->ptr.p_double[srcentryoffs+3];
27232 0 : srcentryoffs = srcentryoffs+entrysize;
27233 : }
27234 0 : batch4buf->ptr.p_double[entryoffs+0] = v0;
27235 0 : batch4buf->ptr.p_double[entryoffs+1] = v1;
27236 0 : batch4buf->ptr.p_double[entryoffs+2] = v2;
27237 0 : batch4buf->ptr.p_double[entryoffs+3] = v3;
27238 0 : continue;
27239 : }
27240 0 : if( neurontype<0 )
27241 : {
27242 0 : bflag = ae_false;
27243 0 : if( neurontype==-2 )
27244 : {
27245 :
27246 : /*
27247 : * Input neuron, left unchanged
27248 : */
27249 0 : bflag = ae_true;
27250 : }
27251 0 : if( neurontype==-3 )
27252 : {
27253 :
27254 : /*
27255 : * "-1" neuron
27256 : */
27257 0 : batch4buf->ptr.p_double[entryoffs+0] = (double)(-1);
27258 0 : batch4buf->ptr.p_double[entryoffs+1] = (double)(-1);
27259 0 : batch4buf->ptr.p_double[entryoffs+2] = (double)(-1);
27260 0 : batch4buf->ptr.p_double[entryoffs+3] = (double)(-1);
27261 0 : bflag = ae_true;
27262 : }
27263 0 : if( neurontype==-4 )
27264 : {
27265 :
27266 : /*
27267 : * "0" neuron
27268 : */
27269 0 : batch4buf->ptr.p_double[entryoffs+0] = (double)(0);
27270 0 : batch4buf->ptr.p_double[entryoffs+1] = (double)(0);
27271 0 : batch4buf->ptr.p_double[entryoffs+2] = (double)(0);
27272 0 : batch4buf->ptr.p_double[entryoffs+3] = (double)(0);
27273 0 : bflag = ae_true;
27274 : }
27275 0 : ae_assert(bflag, "MLPChunkedProcess: internal error - unknown neuron type!", _state);
27276 0 : continue;
27277 : }
27278 : }
27279 :
27280 : /*
27281 : * SOFTMAX normalization or scaling.
27282 : */
27283 0 : ae_assert(network->structinfo.ptr.p_int[6]==0||network->structinfo.ptr.p_int[6]==1, "MLPChunkedProcess: unknown normalization type!", _state);
27284 0 : if( network->structinfo.ptr.p_int[6]==1 )
27285 : {
27286 :
27287 : /*
27288 : * SOFTMAX-normalized network.
27289 : *
27290 : * First, calculate (V0,V1,V2,V3) - component-wise maximum
27291 : * of output neurons. This vector of maximum values will be
27292 : * used for normalization of outputs prior to calculating
27293 : * exponentials.
27294 : *
27295 : * NOTE: the only purpose of this stage is to prevent overflow
27296 : * during calculation of exponentials. With this stage
27297 : * we make sure that all exponentials are calculated
27298 : * with non-positive argument. If you load (0,0,0,0) to
27299 : * (V0,V1,V2,V3), your program will continue working -
27300 : * although with less robustness.
27301 : */
27302 0 : entryoffs = entrysize*(ntotal-nout);
27303 0 : v0 = batch4buf->ptr.p_double[entryoffs+0];
27304 0 : v1 = batch4buf->ptr.p_double[entryoffs+1];
27305 0 : v2 = batch4buf->ptr.p_double[entryoffs+2];
27306 0 : v3 = batch4buf->ptr.p_double[entryoffs+3];
27307 0 : entryoffs = entryoffs+entrysize;
27308 0 : for(i=1; i<=nout-1; i++)
27309 : {
27310 0 : v = batch4buf->ptr.p_double[entryoffs+0];
27311 0 : if( v>v0 )
27312 : {
27313 0 : v0 = v;
27314 : }
27315 0 : v = batch4buf->ptr.p_double[entryoffs+1];
27316 0 : if( v>v1 )
27317 : {
27318 0 : v1 = v;
27319 : }
27320 0 : v = batch4buf->ptr.p_double[entryoffs+2];
27321 0 : if( v>v2 )
27322 : {
27323 0 : v2 = v;
27324 : }
27325 0 : v = batch4buf->ptr.p_double[entryoffs+3];
27326 0 : if( v>v3 )
27327 : {
27328 0 : v3 = v;
27329 : }
27330 0 : entryoffs = entryoffs+entrysize;
27331 : }
27332 :
27333 : /*
27334 : * Then, calculate exponentials and place them to part of the
27335 : * array which is located past the last entry. We also
27336 : * calculate sum of exponentials.
27337 : */
27338 0 : entryoffs = entrysize*(ntotal-nout);
27339 0 : offs0 = entrysize*ntotal;
27340 0 : s0 = (double)(0);
27341 0 : s1 = (double)(0);
27342 0 : s2 = (double)(0);
27343 0 : s3 = (double)(0);
27344 0 : for(i=0; i<=nout-1; i++)
27345 : {
27346 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+0]-v0, _state);
27347 0 : s0 = s0+v;
27348 0 : batch4buf->ptr.p_double[offs0+0] = v;
27349 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+1]-v1, _state);
27350 0 : s1 = s1+v;
27351 0 : batch4buf->ptr.p_double[offs0+1] = v;
27352 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+2]-v2, _state);
27353 0 : s2 = s2+v;
27354 0 : batch4buf->ptr.p_double[offs0+2] = v;
27355 0 : v = ae_exp(batch4buf->ptr.p_double[entryoffs+3]-v3, _state);
27356 0 : s3 = s3+v;
27357 0 : batch4buf->ptr.p_double[offs0+3] = v;
27358 0 : entryoffs = entryoffs+entrysize;
27359 0 : offs0 = offs0+chunksize;
27360 : }
27361 :
27362 : /*
27363 : * Write SOFTMAX-normalized values to the output array.
27364 : */
27365 0 : offs0 = entrysize*ntotal;
27366 0 : for(i=0; i<=nout-1; i++)
27367 : {
27368 0 : if( csize>0 )
27369 : {
27370 0 : xy->ptr.pp_double[cstart+0][nin+i] = batch4buf->ptr.p_double[offs0+0]/s0;
27371 : }
27372 0 : if( csize>1 )
27373 : {
27374 0 : xy->ptr.pp_double[cstart+1][nin+i] = batch4buf->ptr.p_double[offs0+1]/s1;
27375 : }
27376 0 : if( csize>2 )
27377 : {
27378 0 : xy->ptr.pp_double[cstart+2][nin+i] = batch4buf->ptr.p_double[offs0+2]/s2;
27379 : }
27380 0 : if( csize>3 )
27381 : {
27382 0 : xy->ptr.pp_double[cstart+3][nin+i] = batch4buf->ptr.p_double[offs0+3]/s3;
27383 : }
27384 0 : offs0 = offs0+chunksize;
27385 : }
27386 : }
27387 : else
27388 : {
27389 :
27390 : /*
27391 : * Regression network with sum-of-squares function.
27392 : *
27393 : * For each NOut of last neurons:
27394 : * * calculate difference between actual and desired output
27395 : * * calculate dError/dOut for this neuron (proportional to difference)
27396 : * * store in in last 4 components of entry (these values are used
27397 : * to start backpropagation)
27398 : * * update error
27399 : */
27400 0 : for(i=0; i<=nout-1; i++)
27401 : {
27402 0 : v0 = network->columnsigmas.ptr.p_double[nin+i];
27403 0 : v1 = network->columnmeans.ptr.p_double[nin+i];
27404 0 : entryoffs = entrysize*(ntotal-nout+i);
27405 0 : for(j=0; j<=csize-1; j++)
27406 : {
27407 0 : xy->ptr.pp_double[cstart+j][nin+i] = batch4buf->ptr.p_double[entryoffs+j]*v0+v1;
27408 : }
27409 : }
27410 : }
27411 : }
27412 :
27413 :
27414 : /*************************************************************************
27415 : Returns T*Ln(T/Z), guarded against overflow/underflow.
27416 : Internal subroutine.
27417 : *************************************************************************/
27418 0 : static double mlpbase_safecrossentropy(double t,
27419 : double z,
27420 : ae_state *_state)
27421 : {
27422 : double r;
27423 : double result;
27424 :
27425 :
27426 0 : if( ae_fp_eq(t,(double)(0)) )
27427 : {
27428 0 : result = (double)(0);
27429 : }
27430 : else
27431 : {
27432 0 : if( ae_fp_greater(ae_fabs(z, _state),(double)(1)) )
27433 : {
27434 :
27435 : /*
27436 : * Shouldn't be the case with softmax,
27437 : * but we just want to be sure.
27438 : */
27439 0 : if( ae_fp_eq(t/z,(double)(0)) )
27440 : {
27441 0 : r = ae_minrealnumber;
27442 : }
27443 : else
27444 : {
27445 0 : r = t/z;
27446 : }
27447 : }
27448 : else
27449 : {
27450 :
27451 : /*
27452 : * Normal case
27453 : */
27454 0 : if( ae_fp_eq(z,(double)(0))||ae_fp_greater_eq(ae_fabs(t, _state),ae_maxrealnumber*ae_fabs(z, _state)) )
27455 : {
27456 0 : r = ae_maxrealnumber;
27457 : }
27458 : else
27459 : {
27460 0 : r = t/z;
27461 : }
27462 : }
27463 0 : result = t*ae_log(r, _state);
27464 : }
27465 0 : return result;
27466 : }
27467 :
27468 :
27469 : /*************************************************************************
27470 : This function performs backward pass of neural network randimization:
27471 : * it assumes that Network.Weights stores standard deviation of weights
27472 : (weights are not generated yet, only their deviations are present)
27473 : * it sets deviations of weights which feed NeuronIdx-th neuron to specified value
27474 : * it recursively passes to deeper neuron and modifies their weights
27475 : * it stops after encountering nonlinear neurons, linear activation function,
27476 : input neurons, "0" and "-1" neurons
27477 :
27478 : -- ALGLIB --
27479 : Copyright 27.06.2013 by Bochkanov Sergey
27480 : *************************************************************************/
27481 0 : static void mlpbase_randomizebackwardpass(multilayerperceptron* network,
27482 : ae_int_t neuronidx,
27483 : double v,
27484 : ae_state *_state)
27485 : {
27486 : ae_int_t istart;
27487 : ae_int_t neurontype;
27488 : ae_int_t n1;
27489 : ae_int_t n2;
27490 : ae_int_t w1;
27491 : ae_int_t w2;
27492 : ae_int_t offs;
27493 : ae_int_t i;
27494 :
27495 :
27496 0 : istart = network->structinfo.ptr.p_int[5];
27497 0 : neurontype = network->structinfo.ptr.p_int[istart+neuronidx*mlpbase_nfieldwidth+0];
27498 0 : if( neurontype==-2 )
27499 : {
27500 :
27501 : /*
27502 : * Input neuron - stop
27503 : */
27504 0 : return;
27505 : }
27506 0 : if( neurontype==-3 )
27507 : {
27508 :
27509 : /*
27510 : * "-1" neuron: stop
27511 : */
27512 0 : return;
27513 : }
27514 0 : if( neurontype==-4 )
27515 : {
27516 :
27517 : /*
27518 : * "0" neuron: stop
27519 : */
27520 0 : return;
27521 : }
27522 0 : if( neurontype==0 )
27523 : {
27524 :
27525 : /*
27526 : * Adaptive summator neuron:
27527 : * * modify deviations of its weights
27528 : * * recursively call this function for its inputs
27529 : */
27530 0 : offs = istart+neuronidx*mlpbase_nfieldwidth;
27531 0 : n1 = network->structinfo.ptr.p_int[offs+2];
27532 0 : n2 = n1+network->structinfo.ptr.p_int[offs+1]-1;
27533 0 : w1 = network->structinfo.ptr.p_int[offs+3];
27534 0 : w2 = w1+network->structinfo.ptr.p_int[offs+1]-1;
27535 0 : for(i=w1; i<=w2; i++)
27536 : {
27537 0 : network->weights.ptr.p_double[i] = v;
27538 : }
27539 0 : for(i=n1; i<=n2; i++)
27540 : {
27541 0 : mlpbase_randomizebackwardpass(network, i, v, _state);
27542 : }
27543 0 : return;
27544 : }
27545 0 : if( neurontype==-5 )
27546 : {
27547 :
27548 : /*
27549 : * Linear activation function: stop
27550 : */
27551 0 : return;
27552 : }
27553 0 : if( neurontype>0 )
27554 : {
27555 :
27556 : /*
27557 : * Nonlinear activation function: stop
27558 : */
27559 0 : return;
27560 : }
27561 0 : ae_assert(ae_false, "RandomizeBackwardPass: unexpected neuron type", _state);
27562 : }
27563 :
27564 :
27565 0 : void _modelerrors_init(void* _p, ae_state *_state, ae_bool make_automatic)
27566 : {
27567 0 : modelerrors *p = (modelerrors*)_p;
27568 0 : ae_touch_ptr((void*)p);
27569 0 : }
27570 :
27571 :
27572 0 : void _modelerrors_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
27573 : {
27574 0 : modelerrors *dst = (modelerrors*)_dst;
27575 0 : modelerrors *src = (modelerrors*)_src;
27576 0 : dst->relclserror = src->relclserror;
27577 0 : dst->avgce = src->avgce;
27578 0 : dst->rmserror = src->rmserror;
27579 0 : dst->avgerror = src->avgerror;
27580 0 : dst->avgrelerror = src->avgrelerror;
27581 0 : }
27582 :
27583 :
27584 0 : void _modelerrors_clear(void* _p)
27585 : {
27586 0 : modelerrors *p = (modelerrors*)_p;
27587 0 : ae_touch_ptr((void*)p);
27588 0 : }
27589 :
27590 :
27591 0 : void _modelerrors_destroy(void* _p)
27592 : {
27593 0 : modelerrors *p = (modelerrors*)_p;
27594 0 : ae_touch_ptr((void*)p);
27595 0 : }
27596 :
27597 :
27598 0 : void _smlpgrad_init(void* _p, ae_state *_state, ae_bool make_automatic)
27599 : {
27600 0 : smlpgrad *p = (smlpgrad*)_p;
27601 0 : ae_touch_ptr((void*)p);
27602 0 : ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic);
27603 0 : }
27604 :
27605 :
27606 0 : void _smlpgrad_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
27607 : {
27608 0 : smlpgrad *dst = (smlpgrad*)_dst;
27609 0 : smlpgrad *src = (smlpgrad*)_src;
27610 0 : dst->f = src->f;
27611 0 : ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic);
27612 0 : }
27613 :
27614 :
27615 0 : void _smlpgrad_clear(void* _p)
27616 : {
27617 0 : smlpgrad *p = (smlpgrad*)_p;
27618 0 : ae_touch_ptr((void*)p);
27619 0 : ae_vector_clear(&p->g);
27620 0 : }
27621 :
27622 :
27623 0 : void _smlpgrad_destroy(void* _p)
27624 : {
27625 0 : smlpgrad *p = (smlpgrad*)_p;
27626 0 : ae_touch_ptr((void*)p);
27627 0 : ae_vector_destroy(&p->g);
27628 0 : }
27629 :
27630 :
27631 0 : void _multilayerperceptron_init(void* _p, ae_state *_state, ae_bool make_automatic)
27632 : {
27633 0 : multilayerperceptron *p = (multilayerperceptron*)_p;
27634 0 : ae_touch_ptr((void*)p);
27635 0 : ae_vector_init(&p->hllayersizes, 0, DT_INT, _state, make_automatic);
27636 0 : ae_vector_init(&p->hlconnections, 0, DT_INT, _state, make_automatic);
27637 0 : ae_vector_init(&p->hlneurons, 0, DT_INT, _state, make_automatic);
27638 0 : ae_vector_init(&p->structinfo, 0, DT_INT, _state, make_automatic);
27639 0 : ae_vector_init(&p->weights, 0, DT_REAL, _state, make_automatic);
27640 0 : ae_vector_init(&p->columnmeans, 0, DT_REAL, _state, make_automatic);
27641 0 : ae_vector_init(&p->columnsigmas, 0, DT_REAL, _state, make_automatic);
27642 0 : ae_vector_init(&p->neurons, 0, DT_REAL, _state, make_automatic);
27643 0 : ae_vector_init(&p->dfdnet, 0, DT_REAL, _state, make_automatic);
27644 0 : ae_vector_init(&p->derror, 0, DT_REAL, _state, make_automatic);
27645 0 : ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
27646 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
27647 0 : ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
27648 0 : ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
27649 0 : ae_vector_init(&p->nwbuf, 0, DT_REAL, _state, make_automatic);
27650 0 : ae_vector_init(&p->integerbuf, 0, DT_INT, _state, make_automatic);
27651 0 : _modelerrors_init(&p->err, _state, make_automatic);
27652 0 : ae_vector_init(&p->rndbuf, 0, DT_REAL, _state, make_automatic);
27653 0 : ae_shared_pool_init(&p->buf, _state, make_automatic);
27654 0 : ae_shared_pool_init(&p->gradbuf, _state, make_automatic);
27655 0 : ae_matrix_init(&p->dummydxy, 0, 0, DT_REAL, _state, make_automatic);
27656 0 : _sparsematrix_init(&p->dummysxy, _state, make_automatic);
27657 0 : ae_vector_init(&p->dummyidx, 0, DT_INT, _state, make_automatic);
27658 0 : ae_shared_pool_init(&p->dummypool, _state, make_automatic);
27659 0 : }
27660 :
27661 :
27662 0 : void _multilayerperceptron_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
27663 : {
27664 0 : multilayerperceptron *dst = (multilayerperceptron*)_dst;
27665 0 : multilayerperceptron *src = (multilayerperceptron*)_src;
27666 0 : dst->hlnetworktype = src->hlnetworktype;
27667 0 : dst->hlnormtype = src->hlnormtype;
27668 0 : ae_vector_init_copy(&dst->hllayersizes, &src->hllayersizes, _state, make_automatic);
27669 0 : ae_vector_init_copy(&dst->hlconnections, &src->hlconnections, _state, make_automatic);
27670 0 : ae_vector_init_copy(&dst->hlneurons, &src->hlneurons, _state, make_automatic);
27671 0 : ae_vector_init_copy(&dst->structinfo, &src->structinfo, _state, make_automatic);
27672 0 : ae_vector_init_copy(&dst->weights, &src->weights, _state, make_automatic);
27673 0 : ae_vector_init_copy(&dst->columnmeans, &src->columnmeans, _state, make_automatic);
27674 0 : ae_vector_init_copy(&dst->columnsigmas, &src->columnsigmas, _state, make_automatic);
27675 0 : ae_vector_init_copy(&dst->neurons, &src->neurons, _state, make_automatic);
27676 0 : ae_vector_init_copy(&dst->dfdnet, &src->dfdnet, _state, make_automatic);
27677 0 : ae_vector_init_copy(&dst->derror, &src->derror, _state, make_automatic);
27678 0 : ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
27679 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
27680 0 : ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
27681 0 : ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
27682 0 : ae_vector_init_copy(&dst->nwbuf, &src->nwbuf, _state, make_automatic);
27683 0 : ae_vector_init_copy(&dst->integerbuf, &src->integerbuf, _state, make_automatic);
27684 0 : _modelerrors_init_copy(&dst->err, &src->err, _state, make_automatic);
27685 0 : ae_vector_init_copy(&dst->rndbuf, &src->rndbuf, _state, make_automatic);
27686 0 : ae_shared_pool_init_copy(&dst->buf, &src->buf, _state, make_automatic);
27687 0 : ae_shared_pool_init_copy(&dst->gradbuf, &src->gradbuf, _state, make_automatic);
27688 0 : ae_matrix_init_copy(&dst->dummydxy, &src->dummydxy, _state, make_automatic);
27689 0 : _sparsematrix_init_copy(&dst->dummysxy, &src->dummysxy, _state, make_automatic);
27690 0 : ae_vector_init_copy(&dst->dummyidx, &src->dummyidx, _state, make_automatic);
27691 0 : ae_shared_pool_init_copy(&dst->dummypool, &src->dummypool, _state, make_automatic);
27692 0 : }
27693 :
27694 :
27695 0 : void _multilayerperceptron_clear(void* _p)
27696 : {
27697 0 : multilayerperceptron *p = (multilayerperceptron*)_p;
27698 0 : ae_touch_ptr((void*)p);
27699 0 : ae_vector_clear(&p->hllayersizes);
27700 0 : ae_vector_clear(&p->hlconnections);
27701 0 : ae_vector_clear(&p->hlneurons);
27702 0 : ae_vector_clear(&p->structinfo);
27703 0 : ae_vector_clear(&p->weights);
27704 0 : ae_vector_clear(&p->columnmeans);
27705 0 : ae_vector_clear(&p->columnsigmas);
27706 0 : ae_vector_clear(&p->neurons);
27707 0 : ae_vector_clear(&p->dfdnet);
27708 0 : ae_vector_clear(&p->derror);
27709 0 : ae_vector_clear(&p->x);
27710 0 : ae_vector_clear(&p->y);
27711 0 : ae_matrix_clear(&p->xy);
27712 0 : ae_vector_clear(&p->xyrow);
27713 0 : ae_vector_clear(&p->nwbuf);
27714 0 : ae_vector_clear(&p->integerbuf);
27715 0 : _modelerrors_clear(&p->err);
27716 0 : ae_vector_clear(&p->rndbuf);
27717 0 : ae_shared_pool_clear(&p->buf);
27718 0 : ae_shared_pool_clear(&p->gradbuf);
27719 0 : ae_matrix_clear(&p->dummydxy);
27720 0 : _sparsematrix_clear(&p->dummysxy);
27721 0 : ae_vector_clear(&p->dummyidx);
27722 0 : ae_shared_pool_clear(&p->dummypool);
27723 0 : }
27724 :
27725 :
27726 0 : void _multilayerperceptron_destroy(void* _p)
27727 : {
27728 0 : multilayerperceptron *p = (multilayerperceptron*)_p;
27729 0 : ae_touch_ptr((void*)p);
27730 0 : ae_vector_destroy(&p->hllayersizes);
27731 0 : ae_vector_destroy(&p->hlconnections);
27732 0 : ae_vector_destroy(&p->hlneurons);
27733 0 : ae_vector_destroy(&p->structinfo);
27734 0 : ae_vector_destroy(&p->weights);
27735 0 : ae_vector_destroy(&p->columnmeans);
27736 0 : ae_vector_destroy(&p->columnsigmas);
27737 0 : ae_vector_destroy(&p->neurons);
27738 0 : ae_vector_destroy(&p->dfdnet);
27739 0 : ae_vector_destroy(&p->derror);
27740 0 : ae_vector_destroy(&p->x);
27741 0 : ae_vector_destroy(&p->y);
27742 0 : ae_matrix_destroy(&p->xy);
27743 0 : ae_vector_destroy(&p->xyrow);
27744 0 : ae_vector_destroy(&p->nwbuf);
27745 0 : ae_vector_destroy(&p->integerbuf);
27746 0 : _modelerrors_destroy(&p->err);
27747 0 : ae_vector_destroy(&p->rndbuf);
27748 0 : ae_shared_pool_destroy(&p->buf);
27749 0 : ae_shared_pool_destroy(&p->gradbuf);
27750 0 : ae_matrix_destroy(&p->dummydxy);
27751 0 : _sparsematrix_destroy(&p->dummysxy);
27752 0 : ae_vector_destroy(&p->dummyidx);
27753 0 : ae_shared_pool_destroy(&p->dummypool);
27754 0 : }
27755 :
27756 :
27757 : #endif
27758 : #if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
27759 :
27760 :
27761 : /*************************************************************************
27762 : Multiclass Fisher LDA
27763 :
27764 : Subroutine finds coefficients of linear combination which optimally separates
27765 : training set on classes.
27766 :
27767 : COMMERCIAL EDITION OF ALGLIB:
27768 :
27769 : ! Commercial version of ALGLIB includes two important improvements of
27770 : ! this function, which can be used from C++ and C#:
27771 : ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
27772 : ! * multithreading support
27773 : !
27774 : ! Intel MKL gives approximately constant (with respect to number of
27775 : ! worker threads) acceleration factor which depends on CPU being used,
27776 : ! problem size and "baseline" ALGLIB edition which is used for
27777 : ! comparison. Best results are achieved for high-dimensional problems
27778 : ! (NVars is at least 256).
27779 : !
27780 : ! Multithreading is used to accelerate initial phase of LDA, which
27781 : ! includes calculation of products of large matrices. Again, for best
27782 : ! efficiency problem must be high-dimensional.
27783 : !
27784 : ! Generally, commercial ALGLIB is several times faster than open-source
27785 : ! generic C edition, and many times faster than open-source C# edition.
27786 : !
27787 : ! We recommend you to read 'Working with commercial version' section of
27788 : ! ALGLIB Reference Manual in order to find out how to use performance-
27789 : ! related features provided by commercial edition of ALGLIB.
27790 :
27791 : INPUT PARAMETERS:
27792 : XY - training set, array[0..NPoints-1,0..NVars].
27793 : First NVars columns store values of independent
27794 : variables, next column stores number of class (from 0
27795 : to NClasses-1) which dataset element belongs to. Fractional
27796 : values are rounded to nearest integer.
27797 : NPoints - training set size, NPoints>=0
27798 : NVars - number of independent variables, NVars>=1
27799 : NClasses - number of classes, NClasses>=2
27800 :
27801 :
27802 : OUTPUT PARAMETERS:
27803 : Info - return code:
27804 : * -4, if internal EVD subroutine hasn't converged
27805 : * -2, if there is a point with class number
27806 : outside of [0..NClasses-1].
27807 : * -1, if incorrect parameters was passed (NPoints<0,
27808 : NVars<1, NClasses<2)
27809 : * 1, if task has been solved
27810 : * 2, if there was a multicollinearity in training set,
27811 : but task has been solved.
27812 : W - linear combination coefficients, array[0..NVars-1]
27813 :
27814 : -- ALGLIB --
27815 : Copyright 31.05.2008 by Bochkanov Sergey
27816 : *************************************************************************/
27817 0 : void fisherlda(/* Real */ ae_matrix* xy,
27818 : ae_int_t npoints,
27819 : ae_int_t nvars,
27820 : ae_int_t nclasses,
27821 : ae_int_t* info,
27822 : /* Real */ ae_vector* w,
27823 : ae_state *_state)
27824 : {
27825 : ae_frame _frame_block;
27826 : ae_matrix w2;
27827 :
27828 0 : ae_frame_make(_state, &_frame_block);
27829 0 : memset(&w2, 0, sizeof(w2));
27830 0 : *info = 0;
27831 0 : ae_vector_clear(w);
27832 0 : ae_matrix_init(&w2, 0, 0, DT_REAL, _state, ae_true);
27833 :
27834 0 : fisherldan(xy, npoints, nvars, nclasses, info, &w2, _state);
27835 0 : if( *info>0 )
27836 : {
27837 0 : ae_vector_set_length(w, nvars, _state);
27838 0 : ae_v_move(&w->ptr.p_double[0], 1, &w2.ptr.pp_double[0][0], w2.stride, ae_v_len(0,nvars-1));
27839 : }
27840 0 : ae_frame_leave(_state);
27841 0 : }
27842 :
27843 :
27844 : /*************************************************************************
27845 : N-dimensional multiclass Fisher LDA
27846 :
27847 : Subroutine finds coefficients of linear combinations which optimally separates
27848 : training set on classes. It returns N-dimensional basis whose vector are sorted
27849 : by quality of training set separation (in descending order).
27850 :
27851 : ! COMMERCIAL EDITION OF ALGLIB:
27852 : !
27853 : ! Commercial Edition of ALGLIB includes following important improvements
27854 : ! of this function:
27855 : ! * high-performance native backend with same C# interface (C# version)
27856 : ! * multithreading support (C++ and C# versions)
27857 : ! * hardware vendor (Intel) implementations of linear algebra primitives
27858 : ! (C++ and C# versions, x86/x64 platform)
27859 : !
27860 : ! We recommend you to read 'Working with commercial version' section of
27861 : ! ALGLIB Reference Manual in order to find out how to use performance-
27862 : ! related features provided by commercial edition of ALGLIB.
27863 :
27864 : INPUT PARAMETERS:
27865 : XY - training set, array[0..NPoints-1,0..NVars].
27866 : First NVars columns store values of independent
27867 : variables, next column stores number of class (from 0
27868 : to NClasses-1) which dataset element belongs to. Fractional
27869 : values are rounded to nearest integer.
27870 : NPoints - training set size, NPoints>=0
27871 : NVars - number of independent variables, NVars>=1
27872 : NClasses - number of classes, NClasses>=2
27873 :
27874 :
27875 : OUTPUT PARAMETERS:
27876 : Info - return code:
27877 : * -4, if internal EVD subroutine hasn't converged
27878 : * -2, if there is a point with class number
27879 : outside of [0..NClasses-1].
27880 : * -1, if incorrect parameters was passed (NPoints<0,
27881 : NVars<1, NClasses<2)
27882 : * 1, if task has been solved
27883 : * 2, if there was a multicollinearity in training set,
27884 : but task has been solved.
27885 : W - basis, array[0..NVars-1,0..NVars-1]
27886 : columns of matrix stores basis vectors, sorted by
27887 : quality of training set separation (in descending order)
27888 :
27889 : -- ALGLIB --
27890 : Copyright 31.05.2008 by Bochkanov Sergey
27891 : *************************************************************************/
27892 0 : void fisherldan(/* Real */ ae_matrix* xy,
27893 : ae_int_t npoints,
27894 : ae_int_t nvars,
27895 : ae_int_t nclasses,
27896 : ae_int_t* info,
27897 : /* Real */ ae_matrix* w,
27898 : ae_state *_state)
27899 : {
27900 : ae_frame _frame_block;
27901 : ae_int_t i;
27902 : ae_int_t j;
27903 : ae_int_t k;
27904 : ae_int_t m;
27905 : double v;
27906 : ae_vector c;
27907 : ae_vector mu;
27908 : ae_matrix muc;
27909 : ae_vector nc;
27910 : ae_matrix sw;
27911 : ae_matrix st;
27912 : ae_matrix z;
27913 : ae_matrix z2;
27914 : ae_matrix tm;
27915 : ae_matrix sbroot;
27916 : ae_matrix a;
27917 : ae_matrix xyc;
27918 : ae_matrix xyproj;
27919 : ae_matrix wproj;
27920 : ae_vector tf;
27921 : ae_vector d;
27922 : ae_vector d2;
27923 : ae_vector work;
27924 :
27925 0 : ae_frame_make(_state, &_frame_block);
27926 0 : memset(&c, 0, sizeof(c));
27927 0 : memset(&mu, 0, sizeof(mu));
27928 0 : memset(&muc, 0, sizeof(muc));
27929 0 : memset(&nc, 0, sizeof(nc));
27930 0 : memset(&sw, 0, sizeof(sw));
27931 0 : memset(&st, 0, sizeof(st));
27932 0 : memset(&z, 0, sizeof(z));
27933 0 : memset(&z2, 0, sizeof(z2));
27934 0 : memset(&tm, 0, sizeof(tm));
27935 0 : memset(&sbroot, 0, sizeof(sbroot));
27936 0 : memset(&a, 0, sizeof(a));
27937 0 : memset(&xyc, 0, sizeof(xyc));
27938 0 : memset(&xyproj, 0, sizeof(xyproj));
27939 0 : memset(&wproj, 0, sizeof(wproj));
27940 0 : memset(&tf, 0, sizeof(tf));
27941 0 : memset(&d, 0, sizeof(d));
27942 0 : memset(&d2, 0, sizeof(d2));
27943 0 : memset(&work, 0, sizeof(work));
27944 0 : *info = 0;
27945 0 : ae_matrix_clear(w);
27946 0 : ae_vector_init(&c, 0, DT_INT, _state, ae_true);
27947 0 : ae_vector_init(&mu, 0, DT_REAL, _state, ae_true);
27948 0 : ae_matrix_init(&muc, 0, 0, DT_REAL, _state, ae_true);
27949 0 : ae_vector_init(&nc, 0, DT_INT, _state, ae_true);
27950 0 : ae_matrix_init(&sw, 0, 0, DT_REAL, _state, ae_true);
27951 0 : ae_matrix_init(&st, 0, 0, DT_REAL, _state, ae_true);
27952 0 : ae_matrix_init(&z, 0, 0, DT_REAL, _state, ae_true);
27953 0 : ae_matrix_init(&z2, 0, 0, DT_REAL, _state, ae_true);
27954 0 : ae_matrix_init(&tm, 0, 0, DT_REAL, _state, ae_true);
27955 0 : ae_matrix_init(&sbroot, 0, 0, DT_REAL, _state, ae_true);
27956 0 : ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
27957 0 : ae_matrix_init(&xyc, 0, 0, DT_REAL, _state, ae_true);
27958 0 : ae_matrix_init(&xyproj, 0, 0, DT_REAL, _state, ae_true);
27959 0 : ae_matrix_init(&wproj, 0, 0, DT_REAL, _state, ae_true);
27960 0 : ae_vector_init(&tf, 0, DT_REAL, _state, ae_true);
27961 0 : ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
27962 0 : ae_vector_init(&d2, 0, DT_REAL, _state, ae_true);
27963 0 : ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
27964 :
27965 :
27966 : /*
27967 : * Test data
27968 : */
27969 0 : if( (npoints<0||nvars<1)||nclasses<2 )
27970 : {
27971 0 : *info = -1;
27972 0 : ae_frame_leave(_state);
27973 0 : return;
27974 : }
27975 0 : for(i=0; i<=npoints-1; i++)
27976 : {
27977 0 : if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
27978 : {
27979 0 : *info = -2;
27980 0 : ae_frame_leave(_state);
27981 0 : return;
27982 : }
27983 : }
27984 0 : *info = 1;
27985 :
27986 : /*
27987 : * Special case: NPoints<=1
27988 : * Degenerate task.
27989 : */
27990 0 : if( npoints<=1 )
27991 : {
27992 0 : *info = 2;
27993 0 : ae_matrix_set_length(w, nvars, nvars, _state);
27994 0 : for(i=0; i<=nvars-1; i++)
27995 : {
27996 0 : for(j=0; j<=nvars-1; j++)
27997 : {
27998 0 : if( i==j )
27999 : {
28000 0 : w->ptr.pp_double[i][j] = (double)(1);
28001 : }
28002 : else
28003 : {
28004 0 : w->ptr.pp_double[i][j] = (double)(0);
28005 : }
28006 : }
28007 : }
28008 0 : ae_frame_leave(_state);
28009 0 : return;
28010 : }
28011 :
28012 : /*
28013 : * Prepare temporaries
28014 : */
28015 0 : ae_vector_set_length(&tf, nvars, _state);
28016 0 : ae_vector_set_length(&work, ae_maxint(nvars, npoints, _state)+1, _state);
28017 0 : ae_matrix_set_length(&xyc, npoints, nvars, _state);
28018 :
28019 : /*
28020 : * Convert class labels from reals to integers (just for convenience)
28021 : */
28022 0 : ae_vector_set_length(&c, npoints, _state);
28023 0 : for(i=0; i<=npoints-1; i++)
28024 : {
28025 0 : c.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
28026 : }
28027 :
28028 : /*
28029 : * Calculate class sizes, class means
28030 : */
28031 0 : ae_vector_set_length(&mu, nvars, _state);
28032 0 : ae_matrix_set_length(&muc, nclasses, nvars, _state);
28033 0 : ae_vector_set_length(&nc, nclasses, _state);
28034 0 : for(j=0; j<=nvars-1; j++)
28035 : {
28036 0 : mu.ptr.p_double[j] = (double)(0);
28037 : }
28038 0 : for(i=0; i<=nclasses-1; i++)
28039 : {
28040 0 : nc.ptr.p_int[i] = 0;
28041 0 : for(j=0; j<=nvars-1; j++)
28042 : {
28043 0 : muc.ptr.pp_double[i][j] = (double)(0);
28044 : }
28045 : }
28046 0 : for(i=0; i<=npoints-1; i++)
28047 : {
28048 0 : ae_v_add(&mu.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
28049 0 : ae_v_add(&muc.ptr.pp_double[c.ptr.p_int[i]][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
28050 0 : nc.ptr.p_int[c.ptr.p_int[i]] = nc.ptr.p_int[c.ptr.p_int[i]]+1;
28051 : }
28052 0 : for(i=0; i<=nclasses-1; i++)
28053 : {
28054 0 : v = (double)1/(double)nc.ptr.p_int[i];
28055 0 : ae_v_muld(&muc.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), v);
28056 : }
28057 0 : v = (double)1/(double)npoints;
28058 0 : ae_v_muld(&mu.ptr.p_double[0], 1, ae_v_len(0,nvars-1), v);
28059 :
28060 : /*
28061 : * Create ST matrix
28062 : */
28063 0 : ae_matrix_set_length(&st, nvars, nvars, _state);
28064 0 : for(i=0; i<=nvars-1; i++)
28065 : {
28066 0 : for(j=0; j<=nvars-1; j++)
28067 : {
28068 0 : st.ptr.pp_double[i][j] = (double)(0);
28069 : }
28070 : }
28071 0 : for(k=0; k<=npoints-1; k++)
28072 : {
28073 0 : ae_v_move(&xyc.ptr.pp_double[k][0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nvars-1));
28074 0 : ae_v_sub(&xyc.ptr.pp_double[k][0], 1, &mu.ptr.p_double[0], 1, ae_v_len(0,nvars-1));
28075 : }
28076 0 : rmatrixgemm(nvars, nvars, npoints, 1.0, &xyc, 0, 0, 1, &xyc, 0, 0, 0, 0.0, &st, 0, 0, _state);
28077 :
28078 : /*
28079 : * Create SW matrix
28080 : */
28081 0 : ae_matrix_set_length(&sw, nvars, nvars, _state);
28082 0 : for(i=0; i<=nvars-1; i++)
28083 : {
28084 0 : for(j=0; j<=nvars-1; j++)
28085 : {
28086 0 : sw.ptr.pp_double[i][j] = (double)(0);
28087 : }
28088 : }
28089 0 : for(k=0; k<=npoints-1; k++)
28090 : {
28091 0 : ae_v_move(&xyc.ptr.pp_double[k][0], 1, &xy->ptr.pp_double[k][0], 1, ae_v_len(0,nvars-1));
28092 0 : ae_v_sub(&xyc.ptr.pp_double[k][0], 1, &muc.ptr.pp_double[c.ptr.p_int[k]][0], 1, ae_v_len(0,nvars-1));
28093 : }
28094 0 : rmatrixgemm(nvars, nvars, npoints, 1.0, &xyc, 0, 0, 1, &xyc, 0, 0, 0, 0.0, &sw, 0, 0, _state);
28095 :
28096 : /*
28097 : * Maximize ratio J=(w'*ST*w)/(w'*SW*w).
28098 : *
28099 : * First, make transition from w to v such that w'*ST*w becomes v'*v:
28100 : * v = root(ST)*w = R*w
28101 : * R = root(D)*Z'
28102 : * w = (root(ST)^-1)*v = RI*v
28103 : * RI = Z*inv(root(D))
28104 : * J = (v'*v)/(v'*(RI'*SW*RI)*v)
28105 : * ST = Z*D*Z'
28106 : *
28107 : * so we have
28108 : *
28109 : * J = (v'*v) / (v'*(inv(root(D))*Z'*SW*Z*inv(root(D)))*v) =
28110 : * = (v'*v) / (v'*A*v)
28111 : */
28112 0 : if( !smatrixevd(&st, nvars, 1, ae_true, &d, &z, _state) )
28113 : {
28114 0 : *info = -4;
28115 0 : ae_frame_leave(_state);
28116 0 : return;
28117 : }
28118 0 : ae_matrix_set_length(w, nvars, nvars, _state);
28119 0 : if( ae_fp_less_eq(d.ptr.p_double[nvars-1],(double)(0))||ae_fp_less_eq(d.ptr.p_double[0],1000*ae_machineepsilon*d.ptr.p_double[nvars-1]) )
28120 : {
28121 :
28122 : /*
28123 : * Special case: D[NVars-1]<=0
28124 : * Degenerate task (all variables takes the same value).
28125 : */
28126 0 : if( ae_fp_less_eq(d.ptr.p_double[nvars-1],(double)(0)) )
28127 : {
28128 0 : *info = 2;
28129 0 : for(i=0; i<=nvars-1; i++)
28130 : {
28131 0 : for(j=0; j<=nvars-1; j++)
28132 : {
28133 0 : if( i==j )
28134 : {
28135 0 : w->ptr.pp_double[i][j] = (double)(1);
28136 : }
28137 : else
28138 : {
28139 0 : w->ptr.pp_double[i][j] = (double)(0);
28140 : }
28141 : }
28142 : }
28143 0 : ae_frame_leave(_state);
28144 0 : return;
28145 : }
28146 :
28147 : /*
28148 : * Special case: degenerate ST matrix, multicollinearity found.
28149 : * Since we know ST eigenvalues/vectors we can translate task to
28150 : * non-degenerate form.
28151 : *
28152 : * Let WG is orthogonal basis of the non zero variance subspace
28153 : * of the ST and let WZ is orthogonal basis of the zero variance
28154 : * subspace.
28155 : *
28156 : * Projection on WG allows us to use LDA on reduced M-dimensional
28157 : * subspace, N-M vectors of WZ allows us to update reduced LDA
28158 : * factors to full N-dimensional subspace.
28159 : */
28160 0 : m = 0;
28161 0 : for(k=0; k<=nvars-1; k++)
28162 : {
28163 0 : if( ae_fp_less_eq(d.ptr.p_double[k],1000*ae_machineepsilon*d.ptr.p_double[nvars-1]) )
28164 : {
28165 0 : m = k+1;
28166 : }
28167 : }
28168 0 : ae_assert(m!=0, "FisherLDAN: internal error #1", _state);
28169 0 : ae_matrix_set_length(&xyproj, npoints, nvars-m+1, _state);
28170 0 : rmatrixgemm(npoints, nvars-m, nvars, 1.0, xy, 0, 0, 0, &z, 0, m, 0, 0.0, &xyproj, 0, 0, _state);
28171 0 : for(i=0; i<=npoints-1; i++)
28172 : {
28173 0 : xyproj.ptr.pp_double[i][nvars-m] = xy->ptr.pp_double[i][nvars];
28174 : }
28175 0 : fisherldan(&xyproj, npoints, nvars-m, nclasses, info, &wproj, _state);
28176 0 : if( *info<0 )
28177 : {
28178 0 : ae_frame_leave(_state);
28179 0 : return;
28180 : }
28181 0 : rmatrixgemm(nvars, nvars-m, nvars-m, 1.0, &z, 0, m, 0, &wproj, 0, 0, 0, 0.0, w, 0, 0, _state);
28182 0 : for(k=nvars-m; k<=nvars-1; k++)
28183 : {
28184 0 : ae_v_move(&w->ptr.pp_double[0][k], w->stride, &z.ptr.pp_double[0][k-(nvars-m)], z.stride, ae_v_len(0,nvars-1));
28185 : }
28186 0 : *info = 2;
28187 : }
28188 : else
28189 : {
28190 :
28191 : /*
28192 : * General case: no multicollinearity
28193 : */
28194 0 : ae_matrix_set_length(&tm, nvars, nvars, _state);
28195 0 : ae_matrix_set_length(&a, nvars, nvars, _state);
28196 0 : rmatrixgemm(nvars, nvars, nvars, 1.0, &sw, 0, 0, 0, &z, 0, 0, 0, 0.0, &tm, 0, 0, _state);
28197 0 : rmatrixgemm(nvars, nvars, nvars, 1.0, &z, 0, 0, 1, &tm, 0, 0, 0, 0.0, &a, 0, 0, _state);
28198 0 : for(i=0; i<=nvars-1; i++)
28199 : {
28200 0 : for(j=0; j<=nvars-1; j++)
28201 : {
28202 0 : a.ptr.pp_double[i][j] = a.ptr.pp_double[i][j]/ae_sqrt(d.ptr.p_double[i]*d.ptr.p_double[j], _state);
28203 : }
28204 : }
28205 0 : if( !smatrixevd(&a, nvars, 1, ae_true, &d2, &z2, _state) )
28206 : {
28207 0 : *info = -4;
28208 0 : ae_frame_leave(_state);
28209 0 : return;
28210 : }
28211 0 : for(i=0; i<=nvars-1; i++)
28212 : {
28213 0 : for(k=0; k<=nvars-1; k++)
28214 : {
28215 0 : z2.ptr.pp_double[i][k] = z2.ptr.pp_double[i][k]/ae_sqrt(d.ptr.p_double[i], _state);
28216 : }
28217 : }
28218 0 : rmatrixgemm(nvars, nvars, nvars, 1.0, &z, 0, 0, 0, &z2, 0, 0, 0, 0.0, w, 0, 0, _state);
28219 : }
28220 :
28221 : /*
28222 : * Post-processing:
28223 : * * normalization
28224 : * * converting to non-negative form, if possible
28225 : */
28226 0 : for(k=0; k<=nvars-1; k++)
28227 : {
28228 0 : v = ae_v_dotproduct(&w->ptr.pp_double[0][k], w->stride, &w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1));
28229 0 : v = 1/ae_sqrt(v, _state);
28230 0 : ae_v_muld(&w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1), v);
28231 0 : v = (double)(0);
28232 0 : for(i=0; i<=nvars-1; i++)
28233 : {
28234 0 : v = v+w->ptr.pp_double[i][k];
28235 : }
28236 0 : if( ae_fp_less(v,(double)(0)) )
28237 : {
28238 0 : ae_v_muld(&w->ptr.pp_double[0][k], w->stride, ae_v_len(0,nvars-1), -1);
28239 : }
28240 : }
28241 0 : ae_frame_leave(_state);
28242 : }
28243 :
28244 :
28245 : #endif
28246 : #if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
28247 :
28248 :
28249 : /*************************************************************************
28250 : This function creates SSA model object. Right after creation model is in
28251 : "dummy" mode - you can add data, but analyzing/prediction will return
28252 : just zeros (it assumes that basis is empty).
28253 :
28254 : HOW TO USE SSA MODEL:
28255 :
28256 : 1. create model with ssacreate()
28257 : 2. add data with one/many ssaaddsequence() calls
28258 : 3. choose SSA algorithm with one of ssasetalgo...() functions:
28259 : * ssasetalgotopkdirect() for direct one-run analysis
28260 : * ssasetalgotopkrealtime() for algorithm optimized for many subsequent
28261 : runs with warm-start capabilities
28262 : * ssasetalgoprecomputed() for user-supplied basis
28263 : 4. set window width with ssasetwindow()
28264 : 5. perform one of the analysis-related activities:
28265 : a) call ssagetbasis() to get basis
28266 : b) call ssaanalyzelast() ssaanalyzesequence() or ssaanalyzelastwindow()
28267 : to perform analysis (trend/noise separation)
28268 : c) call one of the forecasting functions (ssaforecastlast() or
28269 : ssaforecastsequence()) to perform prediction; alternatively, you can
28270 : extract linear recurrence coefficients with ssagetlrr().
28271 : SSA analysis will be performed during first call to analysis-related
28272 : function. SSA model is smart enough to track all changes in the dataset
28273 : and model settings, to cache previously computed basis and to
28274 : re-evaluate basis only when necessary.
28275 :
28276 : Additionally, if your setting involves constant stream of incoming data,
28277 : you can perform quick update already calculated model with one of the
28278 : incremental append-and-update functions: ssaappendpointandupdate() or
28279 : ssaappendsequenceandupdate().
28280 :
28281 : NOTE: steps (2), (3), (4) can be performed in arbitrary order.
28282 :
28283 : INPUT PARAMETERS:
28284 : none
28285 :
28286 : OUTPUT PARAMETERS:
28287 : S - structure which stores model state
28288 :
28289 : -- ALGLIB --
28290 : Copyright 30.10.2017 by Bochkanov Sergey
28291 : *************************************************************************/
28292 0 : void ssacreate(ssamodel* s, ae_state *_state)
28293 : {
28294 :
28295 0 : _ssamodel_clear(s);
28296 :
28297 :
28298 : /*
28299 : * Model data, algorithms and settings
28300 : */
28301 0 : s->nsequences = 0;
28302 0 : ae_vector_set_length(&s->sequenceidx, 1, _state);
28303 0 : s->sequenceidx.ptr.p_int[0] = 0;
28304 0 : s->algotype = 0;
28305 0 : s->windowwidth = 1;
28306 0 : s->rtpowerup = 1;
28307 0 : s->arebasisandsolvervalid = ae_false;
28308 0 : s->rngseed = 1;
28309 0 : s->defaultsubspaceits = 10;
28310 0 : s->memorylimit = 50000000;
28311 :
28312 : /*
28313 : * Debug counters
28314 : */
28315 0 : s->dbgcntevd = 0;
28316 0 : }
28317 :
28318 :
28319 : /*************************************************************************
28320 : This function sets window width for SSA model. You should call it before
28321 : analysis phase. Default window width is 1 (not for real use).
28322 :
28323 : Special notes:
28324 : * this function call can be performed at any moment before first call to
28325 : analysis-related functions
28326 : * changing window width invalidates internally stored basis; if you change
28327 : window width AFTER you call analysis-related function, next analysis
28328 : phase will require re-calculation of the basis according to current
28329 : algorithm.
28330 : * calling this function with exactly same window width as current one has
28331 : no effect
28332 : * if you specify window width larger than any data sequence stored in the
28333 : model, analysis will return zero basis.
28334 :
28335 : INPUT PARAMETERS:
28336 : S - SSA model created with ssacreate()
28337 : WindowWidth - >=1, new window width
28338 :
28339 : OUTPUT PARAMETERS:
28340 : S - SSA model, updated
28341 :
28342 : -- ALGLIB --
28343 : Copyright 30.10.2017 by Bochkanov Sergey
28344 : *************************************************************************/
28345 0 : void ssasetwindow(ssamodel* s, ae_int_t windowwidth, ae_state *_state)
28346 : {
28347 :
28348 :
28349 0 : ae_assert(windowwidth>=1, "SSASetWindow: WindowWidth<1", _state);
28350 0 : if( windowwidth==s->windowwidth )
28351 : {
28352 0 : return;
28353 : }
28354 0 : s->windowwidth = windowwidth;
28355 0 : s->arebasisandsolvervalid = ae_false;
28356 : }
28357 :
28358 :
28359 : /*************************************************************************
28360 : This function sets seed which is used to initialize internal RNG when
28361 : we make pseudorandom decisions on model updates.
28362 :
28363 : By default, deterministic seed is used - which results in same sequence of
28364 : pseudorandom decisions every time you run SSA model. If you specify non-
28365 : deterministic seed value, then SSA model may return slightly different
28366 : results after each run.
28367 :
28368 : This function can be useful when you have several SSA models updated with
28369 : sseappendpointandupdate() called with 0<UpdateIts<1 (fractional value) and
28370 : due to performance limitations want them to perform updates at different
28371 : moments.
28372 :
28373 : INPUT PARAMETERS:
28374 : S - SSA model
28375 : Seed - seed:
28376 : * positive values = use deterministic seed for each run of
28377 : algorithms which depend on random initialization
28378 : * zero or negative values = use non-deterministic seed
28379 :
28380 : -- ALGLIB --
28381 : Copyright 03.11.2017 by Bochkanov Sergey
28382 : *************************************************************************/
28383 0 : void ssasetseed(ssamodel* s, ae_int_t seed, ae_state *_state)
28384 : {
28385 :
28386 :
28387 0 : s->rngseed = seed;
28388 0 : }
28389 :
28390 :
28391 : /*************************************************************************
28392 : This function sets length of power-up cycle for real-time algorithm.
28393 :
28394 : By default, this algorithm performs costly O(N*WindowWidth^2) init phase
28395 : followed by full run of truncated EVD. However, if you are ready to
28396 : live with a bit lower-quality basis during first few iterations, you can
28397 : split this O(N*WindowWidth^2) initialization between several subsequent
28398 : append-and-update rounds. It results in better latency of the algorithm.
28399 :
28400 : This function invalidates basis/solver, next analysis call will result in
28401 : full recalculation of everything.
28402 :
28403 : INPUT PARAMETERS:
28404 : S - SSA model
28405 : PWLen - length of the power-up stage:
28406 : * 0 means that no power-up is requested
28407 : * 1 is the same as 0
28408 : * >1 means that delayed power-up is performed
28409 :
28410 : -- ALGLIB --
28411 : Copyright 03.11.2017 by Bochkanov Sergey
28412 : *************************************************************************/
28413 0 : void ssasetpoweruplength(ssamodel* s, ae_int_t pwlen, ae_state *_state)
28414 : {
28415 :
28416 :
28417 0 : ae_assert(pwlen>=0, "SSASetPowerUpLength: PWLen<0", _state);
28418 0 : s->rtpowerup = ae_maxint(pwlen, 1, _state);
28419 0 : s->arebasisandsolvervalid = ae_false;
28420 0 : }
28421 :
28422 :
28423 : /*************************************************************************
28424 : This function sets memory limit of SSA analysis.
28425 :
28426 : Straightforward SSA with sequence length T and window width W needs O(T*W)
28427 : memory. It is possible to reduce memory consumption by splitting task into
28428 : smaller chunks.
28429 :
28430 : Thus function allows you to specify approximate memory limit (measured in
28431 : double precision numbers used for buffers). Actual memory consumption will
28432 : be comparable to the number specified by you.
28433 :
28434 : Default memory limit is 50.000.000 (400Mbytes) in current version.
28435 :
28436 : INPUT PARAMETERS:
28437 : S - SSA model
28438 : MemLimit- memory limit, >=0. Zero value means no limit.
28439 :
28440 : -- ALGLIB --
28441 : Copyright 20.12.2017 by Bochkanov Sergey
28442 : *************************************************************************/
28443 0 : void ssasetmemorylimit(ssamodel* s, ae_int_t memlimit, ae_state *_state)
28444 : {
28445 :
28446 :
28447 0 : if( memlimit<0 )
28448 : {
28449 0 : memlimit = 0;
28450 : }
28451 0 : s->memorylimit = memlimit;
28452 0 : }
28453 :
28454 :
28455 : /*************************************************************************
28456 : This function adds data sequence to SSA model. Only single-dimensional
28457 : sequences are supported.
28458 :
28459 : What is a sequences? Following definitions/requirements apply:
28460 : * a sequence is an array of values measured in subsequent, equally
28461 : separated time moments (ticks).
28462 : * you may have many sequences in your dataset; say, one sequence may
28463 : correspond to one trading session.
28464 : * sequence length should be larger than current window length (shorter
28465 : sequences will be ignored during analysis).
28466 : * analysis is performed within a sequence; different sequences are NOT
28467 : stacked together to produce one large contiguous stream of data.
28468 : * analysis is performed for all sequences at once, i.e. same set of basis
28469 : vectors is computed for all sequences
28470 :
28471 : INCREMENTAL ANALYSIS
28472 :
28473 : This function is non intended for incremental updates of previously found
28474 : SSA basis. Calling it invalidates all previous analysis results (basis is
28475 : reset and will be recalculated from zero during next analysis).
28476 :
28477 : If you want to perform incremental/real-time SSA, consider using
28478 : following functions:
28479 : * ssaappendpointandupdate() for appending one point
28480 : * ssaappendsequenceandupdate() for appending new sequence
28481 :
28482 : INPUT PARAMETERS:
28483 : S - SSA model created with ssacreate()
28484 : X - array[N], data, can be larger (additional values
28485 : are ignored)
28486 : N - data length, can be automatically determined from
28487 : the array length. N>=0.
28488 :
28489 : OUTPUT PARAMETERS:
28490 : S - SSA model, updated
28491 :
28492 : NOTE: you can clear dataset with ssacleardata()
28493 :
28494 : -- ALGLIB --
28495 : Copyright 30.10.2017 by Bochkanov Sergey
28496 : *************************************************************************/
28497 0 : void ssaaddsequence(ssamodel* s,
28498 : /* Real */ ae_vector* x,
28499 : ae_int_t n,
28500 : ae_state *_state)
28501 : {
28502 : ae_int_t i;
28503 : ae_int_t offs;
28504 :
28505 :
28506 0 : ae_assert(n>=0, "SSAAddSequence: N<0", _state);
28507 0 : ae_assert(x->cnt>=n, "SSAAddSequence: X is too short", _state);
28508 0 : ae_assert(isfinitevector(x, n, _state), "SSAAddSequence: X contains infinities NANs", _state);
28509 :
28510 : /*
28511 : * Invalidate model
28512 : */
28513 0 : s->arebasisandsolvervalid = ae_false;
28514 :
28515 : /*
28516 : * Add sequence
28517 : */
28518 0 : ivectorgrowto(&s->sequenceidx, s->nsequences+2, _state);
28519 0 : s->sequenceidx.ptr.p_int[s->nsequences+1] = s->sequenceidx.ptr.p_int[s->nsequences]+n;
28520 0 : rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences+1], _state);
28521 0 : offs = s->sequenceidx.ptr.p_int[s->nsequences];
28522 0 : for(i=0; i<=n-1; i++)
28523 : {
28524 0 : s->sequencedata.ptr.p_double[offs+i] = x->ptr.p_double[i];
28525 : }
28526 0 : inc(&s->nsequences, _state);
28527 0 : }
28528 :
28529 :
28530 : /*************************************************************************
28531 : This function appends single point to last data sequence stored in the SSA
28532 : model and tries to update model in the incremental manner (if possible
28533 : with current algorithm).
28534 :
28535 : If you want to add more than one point at once:
28536 : * if you want to add M points to the same sequence, perform M-1 calls with
28537 : UpdateIts parameter set to 0.0, and last call with non-zero UpdateIts.
28538 : * if you want to add new sequence, use ssaappendsequenceandupdate()
28539 :
28540 : Running time of this function does NOT depend on dataset size, only on
28541 : window width and number of singular vectors. Depending on algorithm being
28542 : used, incremental update has complexity:
28543 : * for top-K real time - O(UpdateIts*K*Width^2), with fractional UpdateIts
28544 : * for top-K direct - O(Width^3) for any non-zero UpdateIts
28545 : * for precomputed basis - O(1), no update is performed
28546 :
28547 : INPUT PARAMETERS:
28548 : S - SSA model created with ssacreate()
28549 : X - new point
28550 : UpdateIts - >=0, floating point (!) value, desired update
28551 : frequency:
28552 : * zero value means that point is stored, but no
28553 : update is performed
28554 : * integer part of the value means that specified
28555 : number of iterations is always performed
28556 : * fractional part of the value means that one
28557 : iteration is performed with this probability.
28558 :
28559 : Recommended value: 0<UpdateIts<=1. Values larger
28560 : than 1 are VERY seldom needed. If your dataset
28561 : changes slowly, you can set it to 0.1 and skip
28562 : 90% of updates.
28563 :
28564 : In any case, no information is lost even with zero
28565 : value of UpdateIts! It will be incorporated into
28566 : model, sooner or later.
28567 :
28568 : OUTPUT PARAMETERS:
28569 : S - SSA model, updated
28570 :
28571 : NOTE: this function uses internal RNG to handle fractional values of
28572 : UpdateIts. By default it is initialized with fixed seed during
28573 : initial calculation of basis. Thus subsequent calls to this function
28574 : will result in the same sequence of pseudorandom decisions.
28575 :
28576 : However, if you have several SSA models which are calculated
28577 : simultaneously, and if you want to reduce computational bottlenecks
28578 : by performing random updates at random moments, then fixed seed is
28579 : not an option - all updates will fire at same moments.
28580 :
28581 : You may change it with ssasetseed() function.
28582 :
28583 : NOTE: this function throws an exception if called for empty dataset (there
28584 : is no "last" sequence to modify).
28585 :
28586 : -- ALGLIB --
28587 : Copyright 30.10.2017 by Bochkanov Sergey
28588 : *************************************************************************/
28589 0 : void ssaappendpointandupdate(ssamodel* s,
28590 : double x,
28591 : double updateits,
28592 : ae_state *_state)
28593 : {
28594 :
28595 :
28596 0 : ae_assert(ae_isfinite(x, _state), "SSAAppendPointAndUpdate: X is not finite", _state);
28597 0 : ae_assert(ae_isfinite(updateits, _state), "SSAAppendPointAndUpdate: UpdateIts is not finite", _state);
28598 0 : ae_assert(ae_fp_greater_eq(updateits,(double)(0)), "SSAAppendPointAndUpdate: UpdateIts<0", _state);
28599 0 : ae_assert(s->nsequences>0, "SSAAppendPointAndUpdate: dataset is empty, no sequence to modify", _state);
28600 :
28601 : /*
28602 : * Append point to dataset
28603 : */
28604 0 : rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]+1, _state);
28605 0 : s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]] = x;
28606 0 : s->sequenceidx.ptr.p_int[s->nsequences] = s->sequenceidx.ptr.p_int[s->nsequences]+1;
28607 :
28608 : /*
28609 : * Do we have something to analyze? If no, invalidate basis
28610 : * (just to be sure) and exit.
28611 : */
28612 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
28613 : {
28614 0 : s->arebasisandsolvervalid = ae_false;
28615 0 : return;
28616 : }
28617 :
28618 : /*
28619 : * Well, we have data to analyze and algorithm set, but basis is
28620 : * invalid. Let's calculate it from scratch and exit.
28621 : */
28622 0 : if( !s->arebasisandsolvervalid )
28623 : {
28624 0 : ssa_updatebasis(s, 0, 0.0, _state);
28625 0 : return;
28626 : }
28627 :
28628 : /*
28629 : * Update already computed basis
28630 : */
28631 0 : ssa_updatebasis(s, 1, updateits, _state);
28632 : }
28633 :
28634 :
28635 : /*************************************************************************
28636 : This function appends new sequence to dataset stored in the SSA model and
28637 : tries to update model in the incremental manner (if possible with current
28638 : algorithm).
28639 :
28640 : Notes:
28641 : * if you want to add M sequences at once, perform M-1 calls with UpdateIts
28642 : parameter set to 0.0, and last call with non-zero UpdateIts.
28643 : * if you want to add just one point, use ssaappendpointandupdate()
28644 :
28645 : Running time of this function does NOT depend on dataset size, only on
28646 : sequence length, window width and number of singular vectors. Depending on
28647 : algorithm being used, incremental update has complexity:
28648 : * for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
28649 : * for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
28650 : * for precomputed basis - O(1), no update is performed
28651 :
28652 : INPUT PARAMETERS:
28653 : S - SSA model created with ssacreate()
28654 : X - new sequence, array[NTicks] or larget
28655 : NTicks - >=1, number of ticks in the sequence
28656 : UpdateIts - >=0, floating point (!) value, desired update
28657 : frequency:
28658 : * zero value means that point is stored, but no
28659 : update is performed
28660 : * integer part of the value means that specified
28661 : number of iterations is always performed
28662 : * fractional part of the value means that one
28663 : iteration is performed with this probability.
28664 :
28665 : Recommended value: 0<UpdateIts<=1. Values larger
28666 : than 1 are VERY seldom needed. If your dataset
28667 : changes slowly, you can set it to 0.1 and skip
28668 : 90% of updates.
28669 :
28670 : In any case, no information is lost even with zero
28671 : value of UpdateIts! It will be incorporated into
28672 : model, sooner or later.
28673 :
28674 : OUTPUT PARAMETERS:
28675 : S - SSA model, updated
28676 :
28677 : NOTE: this function uses internal RNG to handle fractional values of
28678 : UpdateIts. By default it is initialized with fixed seed during
28679 : initial calculation of basis. Thus subsequent calls to this function
28680 : will result in the same sequence of pseudorandom decisions.
28681 :
28682 : However, if you have several SSA models which are calculated
28683 : simultaneously, and if you want to reduce computational bottlenecks
28684 : by performing random updates at random moments, then fixed seed is
28685 : not an option - all updates will fire at same moments.
28686 :
28687 : You may change it with ssasetseed() function.
28688 :
28689 : -- ALGLIB --
28690 : Copyright 30.10.2017 by Bochkanov Sergey
28691 : *************************************************************************/
28692 0 : void ssaappendsequenceandupdate(ssamodel* s,
28693 : /* Real */ ae_vector* x,
28694 : ae_int_t nticks,
28695 : double updateits,
28696 : ae_state *_state)
28697 : {
28698 : ae_int_t i;
28699 : ae_int_t offs;
28700 :
28701 :
28702 0 : ae_assert(nticks>=0, "SSAAppendSequenceAndUpdate: NTicks<0", _state);
28703 0 : ae_assert(x->cnt>=nticks, "SSAAppendSequenceAndUpdate: X is too short", _state);
28704 0 : ae_assert(isfinitevector(x, nticks, _state), "SSAAppendSequenceAndUpdate: X contains infinities NANs", _state);
28705 :
28706 : /*
28707 : * Add sequence
28708 : */
28709 0 : ivectorgrowto(&s->sequenceidx, s->nsequences+2, _state);
28710 0 : s->sequenceidx.ptr.p_int[s->nsequences+1] = s->sequenceidx.ptr.p_int[s->nsequences]+nticks;
28711 0 : rvectorgrowto(&s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences+1], _state);
28712 0 : offs = s->sequenceidx.ptr.p_int[s->nsequences];
28713 0 : for(i=0; i<=nticks-1; i++)
28714 : {
28715 0 : s->sequencedata.ptr.p_double[offs+i] = x->ptr.p_double[i];
28716 : }
28717 0 : inc(&s->nsequences, _state);
28718 :
28719 : /*
28720 : * Do we have something to analyze? If no, invalidate basis
28721 : * (just to be sure) and exit.
28722 : */
28723 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
28724 : {
28725 0 : s->arebasisandsolvervalid = ae_false;
28726 0 : return;
28727 : }
28728 :
28729 : /*
28730 : * Well, we have data to analyze and algorithm set, but basis is
28731 : * invalid. Let's calculate it from scratch and exit.
28732 : */
28733 0 : if( !s->arebasisandsolvervalid )
28734 : {
28735 0 : ssa_updatebasis(s, 0, 0.0, _state);
28736 0 : return;
28737 : }
28738 :
28739 : /*
28740 : * Update already computed basis
28741 : */
28742 0 : if( nticks>=s->windowwidth )
28743 : {
28744 0 : ssa_updatebasis(s, nticks-s->windowwidth+1, updateits, _state);
28745 : }
28746 : }
28747 :
28748 :
28749 : /*************************************************************************
28750 : This function sets SSA algorithm to "precomputed vectors" algorithm.
28751 :
28752 : This algorithm uses precomputed set of orthonormal (orthogonal AND
28753 : normalized) basis vectors supplied by user. Thus, basis calculation phase
28754 : is not performed - we already have our basis - and only analysis/
28755 : forecasting phase requires actual calculations.
28756 :
28757 : This algorithm may handle "append" requests which add just one/few ticks
28758 : to the end of the last sequence in O(1) time.
28759 :
28760 : NOTE: this algorithm accepts both basis and window width, because these
28761 : two parameters are naturally aligned. Calling this function sets
28762 : window width; if you call ssasetwindow() with other window width,
28763 : then during analysis stage algorithm will detect conflict and reset
28764 : to zero basis.
28765 :
28766 : INPUT PARAMETERS:
28767 : S - SSA model
28768 : A - array[WindowWidth,NBasis], orthonormalized basis;
28769 : this function does NOT control orthogonality and
28770 : does NOT perform any kind of renormalization. It
28771 : is your responsibility to provide it with correct
28772 : basis.
28773 : WindowWidth - window width, >=1
28774 : NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
28775 :
28776 : OUTPUT PARAMETERS:
28777 : S - updated model
28778 :
28779 : NOTE: calling this function invalidates basis in all cases.
28780 :
28781 : -- ALGLIB --
28782 : Copyright 30.10.2017 by Bochkanov Sergey
28783 : *************************************************************************/
28784 0 : void ssasetalgoprecomputed(ssamodel* s,
28785 : /* Real */ ae_matrix* a,
28786 : ae_int_t windowwidth,
28787 : ae_int_t nbasis,
28788 : ae_state *_state)
28789 : {
28790 : ae_int_t i;
28791 : ae_int_t j;
28792 :
28793 :
28794 0 : ae_assert(windowwidth>=1, "SSASetAlgoPrecomputed: WindowWidth<1", _state);
28795 0 : ae_assert(nbasis>=1, "SSASetAlgoPrecomputed: NBasis<1", _state);
28796 0 : ae_assert(nbasis<=windowwidth, "SSASetAlgoPrecomputed: NBasis>WindowWidth", _state);
28797 0 : ae_assert(a->rows>=windowwidth, "SSASetAlgoPrecomputed: Rows(A)<WindowWidth", _state);
28798 0 : ae_assert(a->cols>=nbasis, "SSASetAlgoPrecomputed: Rows(A)<NBasis", _state);
28799 0 : ae_assert(apservisfinitematrix(a, windowwidth, nbasis, _state), "SSASetAlgoPrecomputed: Rows(A)<NBasis", _state);
28800 0 : s->algotype = 1;
28801 0 : s->precomputedwidth = windowwidth;
28802 0 : s->precomputednbasis = nbasis;
28803 0 : s->windowwidth = windowwidth;
28804 0 : rmatrixsetlengthatleast(&s->precomputedbasis, windowwidth, nbasis, _state);
28805 0 : for(i=0; i<=windowwidth-1; i++)
28806 : {
28807 0 : for(j=0; j<=nbasis-1; j++)
28808 : {
28809 0 : s->precomputedbasis.ptr.pp_double[i][j] = a->ptr.pp_double[i][j];
28810 : }
28811 : }
28812 0 : s->arebasisandsolvervalid = ae_false;
28813 0 : }
28814 :
28815 :
28816 : /*************************************************************************
28817 : This function sets SSA algorithm to "direct top-K" algorithm.
28818 :
28819 : "Direct top-K" algorithm performs full SVD of the N*WINDOW trajectory
28820 : matrix (hence its name - direct solver is used), then extracts top K
28821 : components. Overall running time is O(N*WINDOW^2), where N is a number of
28822 : ticks in the dataset, WINDOW is window width.
28823 :
28824 : This algorithm may handle "append" requests which add just one/few ticks
28825 : to the end of the last sequence in O(WINDOW^3) time, which is ~N/WINDOW
28826 : times faster than re-computing everything from scratch.
28827 :
28828 : INPUT PARAMETERS:
28829 : S - SSA model
28830 : TopK - number of components to analyze; TopK>=1.
28831 :
28832 : OUTPUT PARAMETERS:
28833 : S - updated model
28834 :
28835 :
28836 : NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
28837 : phase
28838 :
28839 : NOTE: calling this function invalidates basis, except for the situation
28840 : when this algorithm was already set with same parameters.
28841 :
28842 : -- ALGLIB --
28843 : Copyright 30.10.2017 by Bochkanov Sergey
28844 : *************************************************************************/
28845 0 : void ssasetalgotopkdirect(ssamodel* s, ae_int_t topk, ae_state *_state)
28846 : {
28847 :
28848 :
28849 0 : ae_assert(topk>=1, "SSASetAlgoTopKDirect: TopK<1", _state);
28850 :
28851 : /*
28852 : * Ignore calls which change nothing
28853 : */
28854 0 : if( s->algotype==2&&s->topk==topk )
28855 : {
28856 0 : return;
28857 : }
28858 :
28859 : /*
28860 : * Update settings, invalidate model
28861 : */
28862 0 : s->algotype = 2;
28863 0 : s->topk = topk;
28864 0 : s->arebasisandsolvervalid = ae_false;
28865 : }
28866 :
28867 :
28868 : /*************************************************************************
28869 : This function sets SSA algorithm to "top-K real time algorithm". This algo
28870 : extracts K components with largest singular values.
28871 :
28872 : It is real-time version of top-K algorithm which is optimized for
28873 : incremental processing and fast start-up. Internally it uses subspace
28874 : eigensolver for truncated SVD. It results in ability to perform quick
28875 : updates of the basis when only a few points/sequences is added to dataset.
28876 :
28877 : Performance profile of the algorithm is given below:
28878 : * O(K*WindowWidth^2) running time for incremental update of the dataset
28879 : with one of the "append-and-update" functions (ssaappendpointandupdate()
28880 : or ssaappendsequenceandupdate()).
28881 : * O(N*WindowWidth^2) running time for initial basis evaluation (N=size of
28882 : dataset)
28883 : * ability to split costly initialization across several incremental
28884 : updates of the basis (so called "Power-Up" functionality, activated by
28885 : ssasetpoweruplength() function)
28886 :
28887 : INPUT PARAMETERS:
28888 : S - SSA model
28889 : TopK - number of components to analyze; TopK>=1.
28890 :
28891 : OUTPUT PARAMETERS:
28892 : S - updated model
28893 :
28894 : NOTE: this algorithm is optimized for large-scale tasks with large
28895 : datasets. On toy problems with just 5-10 points it can return basis
28896 : which is slightly different from that returned by direct algorithm
28897 : (ssasetalgotopkdirect() function). However, the difference becomes
28898 : negligible as dataset grows.
28899 :
28900 : NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
28901 : phase
28902 :
28903 : NOTE: calling this function invalidates basis, except for the situation
28904 : when this algorithm was already set with same parameters.
28905 :
28906 : -- ALGLIB --
28907 : Copyright 30.10.2017 by Bochkanov Sergey
28908 : *************************************************************************/
28909 0 : void ssasetalgotopkrealtime(ssamodel* s, ae_int_t topk, ae_state *_state)
28910 : {
28911 :
28912 :
28913 0 : ae_assert(topk>=1, "SSASetAlgoTopKRealTime: TopK<1", _state);
28914 :
28915 : /*
28916 : * Ignore calls which change nothing
28917 : */
28918 0 : if( s->algotype==3&&s->topk==topk )
28919 : {
28920 0 : return;
28921 : }
28922 :
28923 : /*
28924 : * Update settings, invalidate model
28925 : */
28926 0 : s->algotype = 3;
28927 0 : s->topk = topk;
28928 0 : s->arebasisandsolvervalid = ae_false;
28929 : }
28930 :
28931 :
28932 : /*************************************************************************
28933 : This function clears all data stored in the model and invalidates all
28934 : basis components found so far.
28935 :
28936 : INPUT PARAMETERS:
28937 : S - SSA model created with ssacreate()
28938 :
28939 : OUTPUT PARAMETERS:
28940 : S - SSA model, updated
28941 :
28942 : -- ALGLIB --
28943 : Copyright 30.10.2017 by Bochkanov Sergey
28944 : *************************************************************************/
28945 0 : void ssacleardata(ssamodel* s, ae_state *_state)
28946 : {
28947 :
28948 :
28949 0 : s->nsequences = 0;
28950 0 : s->arebasisandsolvervalid = ae_false;
28951 0 : }
28952 :
28953 :
28954 : /*************************************************************************
28955 : This function executes SSA on internally stored dataset and returns basis
28956 : found by current method.
28957 :
28958 : INPUT PARAMETERS:
28959 : S - SSA model
28960 :
28961 : OUTPUT PARAMETERS:
28962 : A - array[WindowWidth,NBasis], basis; vectors are
28963 : stored in matrix columns, by descreasing variance
28964 : SV - array[NBasis]:
28965 : * zeros - for model initialized with SSASetAlgoPrecomputed()
28966 : * singular values - for other algorithms
28967 : WindowWidth - current window
28968 : NBasis - basis size
28969 :
28970 :
28971 : CACHING/REUSE OF THE BASIS
28972 :
28973 : Caching/reuse of previous results is performed:
28974 : * first call performs full run of SSA; basis is stored in the cache
28975 : * subsequent calls reuse previously cached basis
28976 : * if you call any function which changes model properties (window length,
28977 : algorithm, dataset), internal basis will be invalidated.
28978 : * the only calls which do NOT invalidate basis are listed below:
28979 : a) ssasetwindow() with same window length
28980 : b) ssaappendpointandupdate()
28981 : c) ssaappendsequenceandupdate()
28982 : d) ssasetalgotopk...() with exactly same K
28983 : Calling these functions will result in reuse of previously found basis.
28984 :
28985 :
28986 : HANDLING OF DEGENERATE CASES
28987 :
28988 : Calling this function in degenerate cases (no data or all data are
28989 : shorter than window size; no algorithm is specified) returns basis with
28990 : just one zero vector.
28991 :
28992 : -- ALGLIB --
28993 : Copyright 30.10.2017 by Bochkanov Sergey
28994 : *************************************************************************/
28995 0 : void ssagetbasis(ssamodel* s,
28996 : /* Real */ ae_matrix* a,
28997 : /* Real */ ae_vector* sv,
28998 : ae_int_t* windowwidth,
28999 : ae_int_t* nbasis,
29000 : ae_state *_state)
29001 : {
29002 : ae_int_t i;
29003 :
29004 0 : ae_matrix_clear(a);
29005 0 : ae_vector_clear(sv);
29006 0 : *windowwidth = 0;
29007 0 : *nbasis = 0;
29008 :
29009 :
29010 : /*
29011 : * Is it degenerate case?
29012 : */
29013 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
29014 : {
29015 0 : *windowwidth = s->windowwidth;
29016 0 : *nbasis = 1;
29017 0 : ae_matrix_set_length(a, *windowwidth, 1, _state);
29018 0 : for(i=0; i<=*windowwidth-1; i++)
29019 : {
29020 0 : a->ptr.pp_double[i][0] = 0.0;
29021 : }
29022 0 : ae_vector_set_length(sv, 1, _state);
29023 0 : sv->ptr.p_double[0] = 0.0;
29024 0 : return;
29025 : }
29026 :
29027 : /*
29028 : * Update basis.
29029 : *
29030 : * It will take care of basis validity flags. AppendLen=0 which means
29031 : * that we perform initial basis evaluation.
29032 : */
29033 0 : ssa_updatebasis(s, 0, 0.0, _state);
29034 :
29035 : /*
29036 : * Output
29037 : */
29038 0 : ae_assert(s->nbasis>0, "SSAGetBasis: integrity check failed", _state);
29039 0 : ae_assert(s->windowwidth>0, "SSAGetBasis: integrity check failed", _state);
29040 0 : *nbasis = s->nbasis;
29041 0 : *windowwidth = s->windowwidth;
29042 0 : ae_matrix_set_length(a, *windowwidth, *nbasis, _state);
29043 0 : rmatrixcopy(*windowwidth, *nbasis, &s->basis, 0, 0, a, 0, 0, _state);
29044 0 : ae_vector_set_length(sv, *nbasis, _state);
29045 0 : for(i=0; i<=*nbasis-1; i++)
29046 : {
29047 0 : sv->ptr.p_double[i] = s->sv.ptr.p_double[i];
29048 : }
29049 : }
29050 :
29051 :
29052 : /*************************************************************************
29053 : This function returns linear recurrence relation (LRR) coefficients found
29054 : by current SSA algorithm.
29055 :
29056 : INPUT PARAMETERS:
29057 : S - SSA model
29058 :
29059 : OUTPUT PARAMETERS:
29060 : A - array[WindowWidth-1]. Coefficients of the
29061 : linear recurrence of the form:
29062 : X[W-1] = X[W-2]*A[W-2] + X[W-3]*A[W-3] + ... + X[0]*A[0].
29063 : Empty array for WindowWidth=1.
29064 : WindowWidth - current window width
29065 :
29066 :
29067 : CACHING/REUSE OF THE BASIS
29068 :
29069 : Caching/reuse of previous results is performed:
29070 : * first call performs full run of SSA; basis is stored in the cache
29071 : * subsequent calls reuse previously cached basis
29072 : * if you call any function which changes model properties (window length,
29073 : algorithm, dataset), internal basis will be invalidated.
29074 : * the only calls which do NOT invalidate basis are listed below:
29075 : a) ssasetwindow() with same window length
29076 : b) ssaappendpointandupdate()
29077 : c) ssaappendsequenceandupdate()
29078 : d) ssasetalgotopk...() with exactly same K
29079 : Calling these functions will result in reuse of previously found basis.
29080 :
29081 :
29082 : HANDLING OF DEGENERATE CASES
29083 :
29084 : Calling this function in degenerate cases (no data or all data are
29085 : shorter than window size; no algorithm is specified) returns zeros.
29086 :
29087 : -- ALGLIB --
29088 : Copyright 30.10.2017 by Bochkanov Sergey
29089 : *************************************************************************/
29090 0 : void ssagetlrr(ssamodel* s,
29091 : /* Real */ ae_vector* a,
29092 : ae_int_t* windowwidth,
29093 : ae_state *_state)
29094 : {
29095 : ae_int_t i;
29096 :
29097 0 : ae_vector_clear(a);
29098 0 : *windowwidth = 0;
29099 :
29100 0 : ae_assert(s->windowwidth>0, "SSAGetLRR: integrity check failed", _state);
29101 :
29102 : /*
29103 : * Is it degenerate case?
29104 : */
29105 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
29106 : {
29107 0 : *windowwidth = s->windowwidth;
29108 0 : ae_vector_set_length(a, *windowwidth-1, _state);
29109 0 : for(i=0; i<=*windowwidth-2; i++)
29110 : {
29111 0 : a->ptr.p_double[i] = 0.0;
29112 : }
29113 0 : return;
29114 : }
29115 :
29116 : /*
29117 : * Update basis.
29118 : *
29119 : * It will take care of basis validity flags. AppendLen=0 which means
29120 : * that we perform initial basis evaluation.
29121 : */
29122 0 : ssa_updatebasis(s, 0, 0.0, _state);
29123 :
29124 : /*
29125 : * Output
29126 : */
29127 0 : *windowwidth = s->windowwidth;
29128 0 : ae_vector_set_length(a, *windowwidth-1, _state);
29129 0 : for(i=0; i<=*windowwidth-2; i++)
29130 : {
29131 0 : a->ptr.p_double[i] = s->forecasta.ptr.p_double[i];
29132 : }
29133 : }
29134 :
29135 :
29136 : /*************************************************************************
29137 : This function executes SSA on internally stored dataset and returns
29138 : analysis for the last window of the last sequence. Such analysis is
29139 : an lightweight alternative for full scale reconstruction (see below).
29140 :
29141 : Typical use case for this function is real-time setting, when you are
29142 : interested in quick-and-dirty (very quick and very dirty) processing of
29143 : just a few last ticks of the trend.
29144 :
29145 : IMPORTANT: full scale SSA involves analysis of the ENTIRE dataset,
29146 : with reconstruction being done for all positions of sliding
29147 : window with subsequent hankelization (diagonal averaging) of
29148 : the resulting matrix.
29149 :
29150 : Such analysis requires O((DataLen-Window)*Window*NBasis) FLOPs
29151 : and can be quite costly. However, it has nice noise-canceling
29152 : effects due to averaging.
29153 :
29154 : This function performs REDUCED analysis of the last window. It
29155 : is much faster - just O(Window*NBasis), but its results are
29156 : DIFFERENT from that of ssaanalyzelast(). In particular, first
29157 : few points of the trend are much more prone to noise.
29158 :
29159 : INPUT PARAMETERS:
29160 : S - SSA model
29161 :
29162 : OUTPUT PARAMETERS:
29163 : Trend - array[WindowSize], reconstructed trend line
29164 : Noise - array[WindowSize], the rest of the signal;
29165 : it holds that ActualData = Trend+Noise.
29166 : NTicks - current WindowSize
29167 :
29168 :
29169 : CACHING/REUSE OF THE BASIS
29170 :
29171 : Caching/reuse of previous results is performed:
29172 : * first call performs full run of SSA; basis is stored in the cache
29173 : * subsequent calls reuse previously cached basis
29174 : * if you call any function which changes model properties (window length,
29175 : algorithm, dataset), internal basis will be invalidated.
29176 : * the only calls which do NOT invalidate basis are listed below:
29177 : a) ssasetwindow() with same window length
29178 : b) ssaappendpointandupdate()
29179 : c) ssaappendsequenceandupdate()
29180 : d) ssasetalgotopk...() with exactly same K
29181 : Calling these functions will result in reuse of previously found basis.
29182 :
29183 : In any case, only basis is reused. Reconstruction is performed from
29184 : scratch every time you call this function.
29185 :
29186 :
29187 : HANDLING OF DEGENERATE CASES
29188 :
29189 : Following degenerate cases may happen:
29190 : * dataset is empty (no analysis can be done)
29191 : * all sequences are shorter than the window length,no analysis can be done
29192 : * no algorithm is specified (no analysis can be done)
29193 : * last sequence is shorter than the window length (analysis can be done,
29194 : but we can not perform reconstruction on the last sequence)
29195 :
29196 : Calling this function in degenerate cases returns following result:
29197 : * in any case, WindowWidth ticks is returned
29198 : * trend is assumed to be zero
29199 : * noise is initialized by the last sequence; if last sequence is shorter
29200 : than the window size, it is moved to the end of the array, and the
29201 : beginning of the noise array is filled by zeros
29202 :
29203 : No analysis is performed in degenerate cases (we immediately return dummy
29204 : values, no basis is constructed).
29205 :
29206 : -- ALGLIB --
29207 : Copyright 30.10.2017 by Bochkanov Sergey
29208 : *************************************************************************/
29209 0 : void ssaanalyzelastwindow(ssamodel* s,
29210 : /* Real */ ae_vector* trend,
29211 : /* Real */ ae_vector* noise,
29212 : ae_int_t* nticks,
29213 : ae_state *_state)
29214 : {
29215 : ae_int_t i;
29216 : ae_int_t offs;
29217 : ae_int_t cnt;
29218 :
29219 0 : ae_vector_clear(trend);
29220 0 : ae_vector_clear(noise);
29221 0 : *nticks = 0;
29222 :
29223 :
29224 : /*
29225 : * Init
29226 : */
29227 0 : *nticks = s->windowwidth;
29228 0 : ae_vector_set_length(trend, s->windowwidth, _state);
29229 0 : ae_vector_set_length(noise, s->windowwidth, _state);
29230 :
29231 : /*
29232 : * Is it degenerate case?
29233 : */
29234 0 : if( !ssa_hassomethingtoanalyze(s, _state)||!ssa_issequencebigenough(s, -1, _state) )
29235 : {
29236 0 : for(i=0; i<=*nticks-1; i++)
29237 : {
29238 0 : trend->ptr.p_double[i] = (double)(0);
29239 0 : noise->ptr.p_double[i] = (double)(0);
29240 : }
29241 0 : if( s->nsequences>=1 )
29242 : {
29243 0 : cnt = ae_minint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], *nticks, _state);
29244 0 : offs = s->sequenceidx.ptr.p_int[s->nsequences]-cnt;
29245 0 : for(i=0; i<=cnt-1; i++)
29246 : {
29247 0 : noise->ptr.p_double[*nticks-cnt+i] = s->sequencedata.ptr.p_double[offs+i];
29248 : }
29249 : }
29250 0 : return;
29251 : }
29252 :
29253 : /*
29254 : * Update basis.
29255 : *
29256 : * It will take care of basis validity flags. AppendLen=0 which means
29257 : * that we perform initial basis evaluation.
29258 : */
29259 0 : ssa_updatebasis(s, 0, 0.0, _state);
29260 :
29261 : /*
29262 : * Perform analysis of the last window
29263 : */
29264 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth>=0, "SSAAnalyzeLastWindow: integrity check failed", _state);
29265 0 : rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
29266 0 : rmatrixgemv(s->nbasis, s->windowwidth, 1.0, &s->basist, 0, 0, 0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth, 0.0, &s->tmp0, 0, _state);
29267 0 : rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, trend, 0, _state);
29268 0 : offs = s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth;
29269 0 : cnt = s->windowwidth;
29270 0 : for(i=0; i<=cnt-1; i++)
29271 : {
29272 0 : noise->ptr.p_double[i] = s->sequencedata.ptr.p_double[offs+i]-trend->ptr.p_double[i];
29273 : }
29274 : }
29275 :
29276 :
29277 : /*************************************************************************
29278 : This function:
29279 : * builds SSA basis using internally stored (entire) dataset
29280 : * returns reconstruction for the last NTicks of the last sequence
29281 :
29282 : If you want to analyze some other sequence, use ssaanalyzesequence().
29283 :
29284 : Reconstruction phase involves generation of NTicks-WindowWidth sliding
29285 : windows, their decomposition using empirical orthogonal functions found by
29286 : SSA, followed by averaging of each data point across several overlapping
29287 : windows. Thus, every point in the output trend is reconstructed using up
29288 : to WindowWidth overlapping windows (WindowWidth windows exactly in the
29289 : inner points, just one window at the extremal points).
29290 :
29291 : IMPORTANT: due to averaging this function returns different results for
29292 : different values of NTicks. It is expected and not a bug.
29293 :
29294 : For example:
29295 : * Trend[NTicks-1] is always same because it is not averaged in
29296 : any case (same applies to Trend[0]).
29297 : * Trend[NTicks-2] has different values for NTicks=WindowWidth
29298 : and NTicks=WindowWidth+1 because former case means that no
29299 : averaging is performed, and latter case means that averaging
29300 : using two sliding windows is performed. Larger values of
29301 : NTicks produce same results as NTicks=WindowWidth+1.
29302 : * ...and so on...
29303 :
29304 : PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
29305 : running time. If you work in time-constrained setting and
29306 : have to analyze just a few last ticks, choosing NTicks equal
29307 : to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
29308 : will result in good compromise between noise cancellation and
29309 : analysis speed.
29310 :
29311 : INPUT PARAMETERS:
29312 : S - SSA model
29313 : NTicks - number of ticks to analyze, Nticks>=1.
29314 : * special case of NTicks<=WindowWidth is handled
29315 : by analyzing last window and returning NTicks
29316 : last ticks.
29317 : * special case NTicks>LastSequenceLen is handled
29318 : by prepending result with NTicks-LastSequenceLen
29319 : zeros.
29320 :
29321 : OUTPUT PARAMETERS:
29322 : Trend - array[NTicks], reconstructed trend line
29323 : Noise - array[NTicks], the rest of the signal;
29324 : it holds that ActualData = Trend+Noise.
29325 :
29326 :
29327 : CACHING/REUSE OF THE BASIS
29328 :
29329 : Caching/reuse of previous results is performed:
29330 : * first call performs full run of SSA; basis is stored in the cache
29331 : * subsequent calls reuse previously cached basis
29332 : * if you call any function which changes model properties (window length,
29333 : algorithm, dataset), internal basis will be invalidated.
29334 : * the only calls which do NOT invalidate basis are listed below:
29335 : a) ssasetwindow() with same window length
29336 : b) ssaappendpointandupdate()
29337 : c) ssaappendsequenceandupdate()
29338 : d) ssasetalgotopk...() with exactly same K
29339 : Calling these functions will result in reuse of previously found basis.
29340 :
29341 : In any case, only basis is reused. Reconstruction is performed from
29342 : scratch every time you call this function.
29343 :
29344 :
29345 : HANDLING OF DEGENERATE CASES
29346 :
29347 : Following degenerate cases may happen:
29348 : * dataset is empty (no analysis can be done)
29349 : * all sequences are shorter than the window length,no analysis can be done
29350 : * no algorithm is specified (no analysis can be done)
29351 : * last sequence is shorter than the window length (analysis can be done,
29352 : but we can not perform reconstruction on the last sequence)
29353 :
29354 : Calling this function in degenerate cases returns following result:
29355 : * in any case, NTicks ticks is returned
29356 : * trend is assumed to be zero
29357 : * noise is initialized by the last sequence; if last sequence is shorter
29358 : than the window size, it is moved to the end of the array, and the
29359 : beginning of the noise array is filled by zeros
29360 :
29361 : No analysis is performed in degenerate cases (we immediately return dummy
29362 : values, no basis is constructed).
29363 :
29364 : -- ALGLIB --
29365 : Copyright 30.10.2017 by Bochkanov Sergey
29366 : *************************************************************************/
29367 0 : void ssaanalyzelast(ssamodel* s,
29368 : ae_int_t nticks,
29369 : /* Real */ ae_vector* trend,
29370 : /* Real */ ae_vector* noise,
29371 : ae_state *_state)
29372 : {
29373 : ae_int_t i;
29374 : ae_int_t offs;
29375 : ae_int_t cnt;
29376 : ae_int_t cntzeros;
29377 :
29378 0 : ae_vector_clear(trend);
29379 0 : ae_vector_clear(noise);
29380 :
29381 0 : ae_assert(nticks>=1, "SSAAnalyzeLast: NTicks<1", _state);
29382 :
29383 : /*
29384 : * Init
29385 : */
29386 0 : ae_vector_set_length(trend, nticks, _state);
29387 0 : ae_vector_set_length(noise, nticks, _state);
29388 :
29389 : /*
29390 : * Is it degenerate case?
29391 : */
29392 0 : if( !ssa_hassomethingtoanalyze(s, _state)||!ssa_issequencebigenough(s, -1, _state) )
29393 : {
29394 0 : for(i=0; i<=nticks-1; i++)
29395 : {
29396 0 : trend->ptr.p_double[i] = (double)(0);
29397 0 : noise->ptr.p_double[i] = (double)(0);
29398 : }
29399 0 : if( s->nsequences>=1 )
29400 : {
29401 0 : cnt = ae_minint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], nticks, _state);
29402 0 : offs = s->sequenceidx.ptr.p_int[s->nsequences]-cnt;
29403 0 : for(i=0; i<=cnt-1; i++)
29404 : {
29405 0 : noise->ptr.p_double[nticks-cnt+i] = s->sequencedata.ptr.p_double[offs+i];
29406 : }
29407 : }
29408 0 : return;
29409 : }
29410 :
29411 : /*
29412 : * Fast exit: NTicks<=WindowWidth, just last window is analyzed
29413 : */
29414 0 : if( nticks<=s->windowwidth )
29415 : {
29416 0 : ssaanalyzelastwindow(s, &s->alongtrend, &s->alongnoise, &cnt, _state);
29417 0 : offs = s->windowwidth-nticks;
29418 0 : for(i=0; i<=nticks-1; i++)
29419 : {
29420 0 : trend->ptr.p_double[i] = s->alongtrend.ptr.p_double[offs+i];
29421 0 : noise->ptr.p_double[i] = s->alongnoise.ptr.p_double[offs+i];
29422 : }
29423 0 : return;
29424 : }
29425 :
29426 : /*
29427 : * Update basis.
29428 : *
29429 : * It will take care of basis validity flags. AppendLen=0 which means
29430 : * that we perform initial basis evaluation.
29431 : */
29432 0 : ssa_updatebasis(s, 0, 0.0, _state);
29433 :
29434 : /*
29435 : * Perform analysis:
29436 : * * prepend max(NTicks-LastSequenceLength,0) zeros to the beginning
29437 : * of array
29438 : * * analyze the rest with AnalyzeSequence() which assumes that we
29439 : * already have basis
29440 : */
29441 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>=s->windowwidth, "SSAAnalyzeLast: integrity check failed / 23vd4", _state);
29442 0 : cntzeros = ae_maxint(nticks-(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]), 0, _state);
29443 0 : for(i=0; i<=cntzeros-1; i++)
29444 : {
29445 0 : trend->ptr.p_double[i] = 0.0;
29446 0 : noise->ptr.p_double[i] = 0.0;
29447 : }
29448 0 : cnt = ae_minint(nticks, s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1], _state);
29449 0 : ssa_analyzesequence(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-cnt, s->sequenceidx.ptr.p_int[s->nsequences], trend, noise, cntzeros, _state);
29450 : }
29451 :
29452 :
29453 : /*************************************************************************
29454 : This function:
29455 : * builds SSA basis using internally stored (entire) dataset
29456 : * returns reconstruction for the sequence being passed to this function
29457 :
29458 : If you want to analyze last sequence stored in the model, use
29459 : ssaanalyzelast().
29460 :
29461 : Reconstruction phase involves generation of NTicks-WindowWidth sliding
29462 : windows, their decomposition using empirical orthogonal functions found by
29463 : SSA, followed by averaging of each data point across several overlapping
29464 : windows. Thus, every point in the output trend is reconstructed using up
29465 : to WindowWidth overlapping windows (WindowWidth windows exactly in the
29466 : inner points, just one window at the extremal points).
29467 :
29468 : PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
29469 : running time. If you work in time-constrained setting and
29470 : have to analyze just a few last ticks, choosing NTicks equal
29471 : to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
29472 : will result in good compromise between noise cancellation and
29473 : analysis speed.
29474 :
29475 : INPUT PARAMETERS:
29476 : S - SSA model
29477 : Data - array[NTicks], can be larger (only NTicks leading
29478 : elements will be used)
29479 : NTicks - number of ticks to analyze, Nticks>=1.
29480 : * special case of NTicks<WindowWidth is handled
29481 : by returning zeros as trend, and signal as noise
29482 :
29483 : OUTPUT PARAMETERS:
29484 : Trend - array[NTicks], reconstructed trend line
29485 : Noise - array[NTicks], the rest of the signal;
29486 : it holds that ActualData = Trend+Noise.
29487 :
29488 :
29489 : CACHING/REUSE OF THE BASIS
29490 :
29491 : Caching/reuse of previous results is performed:
29492 : * first call performs full run of SSA; basis is stored in the cache
29493 : * subsequent calls reuse previously cached basis
29494 : * if you call any function which changes model properties (window length,
29495 : algorithm, dataset), internal basis will be invalidated.
29496 : * the only calls which do NOT invalidate basis are listed below:
29497 : a) ssasetwindow() with same window length
29498 : b) ssaappendpointandupdate()
29499 : c) ssaappendsequenceandupdate()
29500 : d) ssasetalgotopk...() with exactly same K
29501 : Calling these functions will result in reuse of previously found basis.
29502 :
29503 : In any case, only basis is reused. Reconstruction is performed from
29504 : scratch every time you call this function.
29505 :
29506 :
29507 : HANDLING OF DEGENERATE CASES
29508 :
29509 : Following degenerate cases may happen:
29510 : * dataset is empty (no analysis can be done)
29511 : * all sequences are shorter than the window length,no analysis can be done
29512 : * no algorithm is specified (no analysis can be done)
29513 : * sequence being passed is shorter than the window length
29514 :
29515 : Calling this function in degenerate cases returns following result:
29516 : * in any case, NTicks ticks is returned
29517 : * trend is assumed to be zero
29518 : * noise is initialized by the sequence.
29519 :
29520 : No analysis is performed in degenerate cases (we immediately return dummy
29521 : values, no basis is constructed).
29522 :
29523 : -- ALGLIB --
29524 : Copyright 30.10.2017 by Bochkanov Sergey
29525 : *************************************************************************/
29526 0 : void ssaanalyzesequence(ssamodel* s,
29527 : /* Real */ ae_vector* data,
29528 : ae_int_t nticks,
29529 : /* Real */ ae_vector* trend,
29530 : /* Real */ ae_vector* noise,
29531 : ae_state *_state)
29532 : {
29533 : ae_int_t i;
29534 :
29535 0 : ae_vector_clear(trend);
29536 0 : ae_vector_clear(noise);
29537 :
29538 0 : ae_assert(nticks>=1, "SSAAnalyzeSequence: NTicks<1", _state);
29539 0 : ae_assert(data->cnt>=nticks, "SSAAnalyzeSequence: Data is too short", _state);
29540 0 : ae_assert(isfinitevector(data, nticks, _state), "SSAAnalyzeSequence: Data contains infinities NANs", _state);
29541 :
29542 : /*
29543 : * Init
29544 : */
29545 0 : ae_vector_set_length(trend, nticks, _state);
29546 0 : ae_vector_set_length(noise, nticks, _state);
29547 :
29548 : /*
29549 : * Is it degenerate case?
29550 : */
29551 0 : if( !ssa_hassomethingtoanalyze(s, _state)||nticks<s->windowwidth )
29552 : {
29553 0 : for(i=0; i<=nticks-1; i++)
29554 : {
29555 0 : trend->ptr.p_double[i] = (double)(0);
29556 0 : noise->ptr.p_double[i] = data->ptr.p_double[i];
29557 : }
29558 0 : return;
29559 : }
29560 :
29561 : /*
29562 : * Update basis.
29563 : *
29564 : * It will take care of basis validity flags. AppendLen=0 which means
29565 : * that we perform initial basis evaluation.
29566 : */
29567 0 : ssa_updatebasis(s, 0, 0.0, _state);
29568 :
29569 : /*
29570 : * Perform analysis
29571 : */
29572 0 : ssa_analyzesequence(s, data, 0, nticks, trend, noise, 0, _state);
29573 : }
29574 :
29575 :
29576 : /*************************************************************************
29577 : This function builds SSA basis and performs forecasting for a specified
29578 : number of ticks, returning value of trend.
29579 :
29580 : Forecast is performed as follows:
29581 : * SSA trend extraction is applied to last WindowWidth elements of the
29582 : internally stored dataset; this step is basically a noise reduction.
29583 : * linear recurrence relation is applied to extracted trend
29584 :
29585 : This function has following running time:
29586 : * O(NBasis*WindowWidth) for trend extraction phase (always performed)
29587 : * O(WindowWidth*NTicks) for forecast phase
29588 :
29589 : NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
29590 : apply recurrence relation to raw unprocessed data, use another
29591 : function - ssaforecastsequence() which allows to turn on and off
29592 : noise reduction phase.
29593 :
29594 : NOTE: this algorithm performs prediction using only one - last - sliding
29595 : window. Predictions produced by such approach are smooth
29596 : continuations of the reconstructed trend line, but they can be
29597 : easily corrupted by noise. If you need noise-resistant prediction,
29598 : use ssaforecastavglast() function, which averages predictions built
29599 : using several sliding windows.
29600 :
29601 : INPUT PARAMETERS:
29602 : S - SSA model
29603 : NTicks - number of ticks to forecast, NTicks>=1
29604 :
29605 : OUTPUT PARAMETERS:
29606 : Trend - array[NTicks], predicted trend line
29607 :
29608 :
29609 : CACHING/REUSE OF THE BASIS
29610 :
29611 : Caching/reuse of previous results is performed:
29612 : * first call performs full run of SSA; basis is stored in the cache
29613 : * subsequent calls reuse previously cached basis
29614 : * if you call any function which changes model properties (window length,
29615 : algorithm, dataset), internal basis will be invalidated.
29616 : * the only calls which do NOT invalidate basis are listed below:
29617 : a) ssasetwindow() with same window length
29618 : b) ssaappendpointandupdate()
29619 : c) ssaappendsequenceandupdate()
29620 : d) ssasetalgotopk...() with exactly same K
29621 : Calling these functions will result in reuse of previously found basis.
29622 :
29623 :
29624 : HANDLING OF DEGENERATE CASES
29625 :
29626 : Following degenerate cases may happen:
29627 : * dataset is empty (no analysis can be done)
29628 : * all sequences are shorter than the window length,no analysis can be done
29629 : * no algorithm is specified (no analysis can be done)
29630 : * last sequence is shorter than the WindowWidth (analysis can be done,
29631 : but we can not perform forecasting on the last sequence)
29632 : * window lentgh is 1 (impossible to use for forecasting)
29633 : * SSA analysis algorithm is configured to extract basis whose size is
29634 : equal to window length (impossible to use for forecasting; only basis
29635 : whose size is less than window length can be used).
29636 :
29637 : Calling this function in degenerate cases returns following result:
29638 : * NTicks copies of the last value is returned for non-empty task with
29639 : large enough dataset, but with overcomplete basis (window width=1 or
29640 : basis size is equal to window width)
29641 : * zero trend with length=NTicks is returned for empty task
29642 :
29643 : No analysis is performed in degenerate cases (we immediately return dummy
29644 : values, no basis is ever constructed).
29645 :
29646 : -- ALGLIB --
29647 : Copyright 30.10.2017 by Bochkanov Sergey
29648 : *************************************************************************/
29649 0 : void ssaforecastlast(ssamodel* s,
29650 : ae_int_t nticks,
29651 : /* Real */ ae_vector* trend,
29652 : ae_state *_state)
29653 : {
29654 : ae_int_t i;
29655 : ae_int_t j;
29656 : double v;
29657 : ae_int_t winw;
29658 :
29659 0 : ae_vector_clear(trend);
29660 :
29661 0 : ae_assert(nticks>=1, "SSAForecast: NTicks<1", _state);
29662 :
29663 : /*
29664 : * Init
29665 : */
29666 0 : winw = s->windowwidth;
29667 0 : ae_vector_set_length(trend, nticks, _state);
29668 :
29669 : /*
29670 : * Is it degenerate case?
29671 : */
29672 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
29673 : {
29674 0 : for(i=0; i<=nticks-1; i++)
29675 : {
29676 0 : trend->ptr.p_double[i] = (double)(0);
29677 : }
29678 0 : return;
29679 : }
29680 0 : ae_assert(s->nsequences>0, "SSAForecastLast: integrity check failed", _state);
29681 0 : if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
29682 : {
29683 0 : for(i=0; i<=nticks-1; i++)
29684 : {
29685 0 : trend->ptr.p_double[i] = (double)(0);
29686 : }
29687 0 : return;
29688 : }
29689 0 : if( winw==1 )
29690 : {
29691 0 : ae_assert(s->nsequences>0, "SSAForecast: integrity check failed / 2355", _state);
29692 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecast: integrity check failed", _state);
29693 0 : for(i=0; i<=nticks-1; i++)
29694 : {
29695 0 : trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
29696 : }
29697 0 : return;
29698 : }
29699 :
29700 : /*
29701 : * Update basis and recurrent relation.
29702 : *
29703 : * It will take care of basis validity flags. AppendLen=0 which means
29704 : * that we perform initial basis evaluation.
29705 : */
29706 0 : ssa_updatebasis(s, 0, 0.0, _state);
29707 0 : ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
29708 0 : if( s->nbasis==winw )
29709 : {
29710 :
29711 : /*
29712 : * Handle degenerate situation with basis whose size
29713 : * is equal to window length.
29714 : */
29715 0 : ae_assert(s->nsequences>0, "SSAForecast: integrity check failed / 2355", _state);
29716 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecast: integrity check failed", _state);
29717 0 : for(i=0; i<=nticks-1; i++)
29718 : {
29719 0 : trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
29720 : }
29721 0 : return;
29722 : }
29723 :
29724 : /*
29725 : * Apply recurrent formula for SSA forecasting:
29726 : * * first, perform smoothing of the last window
29727 : * * second, perform analysis phase
29728 : */
29729 0 : ae_assert(s->nsequences>0, "SSAForecastLast: integrity check failed", _state);
29730 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>=s->windowwidth, "SSAForecastLast: integrity check failed", _state);
29731 0 : rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
29732 0 : rvectorsetlengthatleast(&s->fctrend, s->windowwidth, _state);
29733 0 : rmatrixgemv(s->nbasis, s->windowwidth, 1.0, &s->basist, 0, 0, 0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-s->windowwidth, 0.0, &s->tmp0, 0, _state);
29734 0 : rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->fctrend, 0, _state);
29735 0 : rvectorsetlengthatleast(&s->tmp1, winw-1, _state);
29736 0 : for(i=1; i<=winw-1; i++)
29737 : {
29738 0 : s->tmp1.ptr.p_double[i-1] = s->fctrend.ptr.p_double[i];
29739 : }
29740 0 : for(i=0; i<=nticks-1; i++)
29741 : {
29742 0 : v = s->forecasta.ptr.p_double[0]*s->tmp1.ptr.p_double[0];
29743 0 : for(j=1; j<=winw-2; j++)
29744 : {
29745 0 : v = v+s->forecasta.ptr.p_double[j]*s->tmp1.ptr.p_double[j];
29746 0 : s->tmp1.ptr.p_double[j-1] = s->tmp1.ptr.p_double[j];
29747 : }
29748 0 : trend->ptr.p_double[i] = v;
29749 0 : s->tmp1.ptr.p_double[winw-2] = v;
29750 : }
29751 : }
29752 :
29753 :
29754 : /*************************************************************************
29755 : This function builds SSA basis and performs forecasting for a user-
29756 : specified sequence, returning value of trend.
29757 :
29758 : Forecasting is done in two stages:
29759 : * first, we extract trend from the WindowWidth last elements of the
29760 : sequence. This stage is optional, you can turn it off if you pass
29761 : data which are already processed with SSA. Of course, you can turn it
29762 : off even for raw data, but it is not recommended - noise suppression is
29763 : very important for correct prediction.
29764 : * then, we apply LRR for last WindowWidth-1 elements of the extracted
29765 : trend.
29766 :
29767 : This function has following running time:
29768 : * O(NBasis*WindowWidth) for trend extraction phase
29769 : * O(WindowWidth*NTicks) for forecast phase
29770 :
29771 : NOTE: this algorithm performs prediction using only one - last - sliding
29772 : window. Predictions produced by such approach are smooth
29773 : continuations of the reconstructed trend line, but they can be
29774 : easily corrupted by noise. If you need noise-resistant prediction,
29775 : use ssaforecastavgsequence() function, which averages predictions
29776 : built using several sliding windows.
29777 :
29778 : INPUT PARAMETERS:
29779 : S - SSA model
29780 : Data - array[NTicks], data to forecast
29781 : DataLen - number of ticks in the data, DataLen>=1
29782 : ForecastLen - number of ticks to predict, ForecastLen>=1
29783 : ApplySmoothing - whether to apply smoothing trend extraction or not;
29784 : if you do not know what to specify, pass True.
29785 :
29786 : OUTPUT PARAMETERS:
29787 : Trend - array[ForecastLen], forecasted trend
29788 :
29789 :
29790 : CACHING/REUSE OF THE BASIS
29791 :
29792 : Caching/reuse of previous results is performed:
29793 : * first call performs full run of SSA; basis is stored in the cache
29794 : * subsequent calls reuse previously cached basis
29795 : * if you call any function which changes model properties (window length,
29796 : algorithm, dataset), internal basis will be invalidated.
29797 : * the only calls which do NOT invalidate basis are listed below:
29798 : a) ssasetwindow() with same window length
29799 : b) ssaappendpointandupdate()
29800 : c) ssaappendsequenceandupdate()
29801 : d) ssasetalgotopk...() with exactly same K
29802 : Calling these functions will result in reuse of previously found basis.
29803 :
29804 :
29805 : HANDLING OF DEGENERATE CASES
29806 :
29807 : Following degenerate cases may happen:
29808 : * dataset is empty (no analysis can be done)
29809 : * all sequences are shorter than the window length,no analysis can be done
29810 : * no algorithm is specified (no analysis can be done)
29811 : * data sequence is shorter than the WindowWidth (analysis can be done,
29812 : but we can not perform forecasting on the last sequence)
29813 : * window lentgh is 1 (impossible to use for forecasting)
29814 : * SSA analysis algorithm is configured to extract basis whose size is
29815 : equal to window length (impossible to use for forecasting; only basis
29816 : whose size is less than window length can be used).
29817 :
29818 : Calling this function in degenerate cases returns following result:
29819 : * ForecastLen copies of the last value is returned for non-empty task with
29820 : large enough dataset, but with overcomplete basis (window width=1 or
29821 : basis size is equal to window width)
29822 : * zero trend with length=ForecastLen is returned for empty task
29823 :
29824 : No analysis is performed in degenerate cases (we immediately return dummy
29825 : values, no basis is ever constructed).
29826 :
29827 : -- ALGLIB --
29828 : Copyright 30.10.2017 by Bochkanov Sergey
29829 : *************************************************************************/
29830 0 : void ssaforecastsequence(ssamodel* s,
29831 : /* Real */ ae_vector* data,
29832 : ae_int_t datalen,
29833 : ae_int_t forecastlen,
29834 : ae_bool applysmoothing,
29835 : /* Real */ ae_vector* trend,
29836 : ae_state *_state)
29837 : {
29838 : ae_int_t i;
29839 : ae_int_t j;
29840 : double v;
29841 : ae_int_t winw;
29842 :
29843 0 : ae_vector_clear(trend);
29844 :
29845 0 : ae_assert(datalen>=1, "SSAForecastSequence: DataLen<1", _state);
29846 0 : ae_assert(data->cnt>=datalen, "SSAForecastSequence: Data is too short", _state);
29847 0 : ae_assert(isfinitevector(data, datalen, _state), "SSAForecastSequence: Data contains infinities NANs", _state);
29848 0 : ae_assert(forecastlen>=1, "SSAForecastSequence: ForecastLen<1", _state);
29849 :
29850 : /*
29851 : * Init
29852 : */
29853 0 : winw = s->windowwidth;
29854 0 : ae_vector_set_length(trend, forecastlen, _state);
29855 :
29856 : /*
29857 : * Is it degenerate case?
29858 : */
29859 0 : if( !ssa_hassomethingtoanalyze(s, _state)||datalen<winw )
29860 : {
29861 0 : for(i=0; i<=forecastlen-1; i++)
29862 : {
29863 0 : trend->ptr.p_double[i] = (double)(0);
29864 : }
29865 0 : return;
29866 : }
29867 0 : if( winw==1 )
29868 : {
29869 0 : for(i=0; i<=forecastlen-1; i++)
29870 : {
29871 0 : trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
29872 : }
29873 0 : return;
29874 : }
29875 :
29876 : /*
29877 : * Update basis.
29878 : *
29879 : * It will take care of basis validity flags. AppendLen=0 which means
29880 : * that we perform initial basis evaluation.
29881 : */
29882 0 : ssa_updatebasis(s, 0, 0.0, _state);
29883 0 : ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
29884 0 : if( s->nbasis==winw )
29885 : {
29886 :
29887 : /*
29888 : * Handle degenerate situation with basis whose size
29889 : * is equal to window length.
29890 : */
29891 0 : for(i=0; i<=forecastlen-1; i++)
29892 : {
29893 0 : trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
29894 : }
29895 0 : return;
29896 : }
29897 :
29898 : /*
29899 : * Perform trend extraction
29900 : */
29901 0 : rvectorsetlengthatleast(&s->fctrend, s->windowwidth, _state);
29902 0 : if( applysmoothing )
29903 : {
29904 0 : ae_assert(datalen>=winw, "SSAForecastSequence: integrity check failed", _state);
29905 0 : rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
29906 0 : rmatrixgemv(s->nbasis, winw, 1.0, &s->basist, 0, 0, 0, data, datalen-winw, 0.0, &s->tmp0, 0, _state);
29907 0 : rmatrixgemv(winw, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->fctrend, 0, _state);
29908 : }
29909 : else
29910 : {
29911 0 : for(i=0; i<=winw-1; i++)
29912 : {
29913 0 : s->fctrend.ptr.p_double[i] = data->ptr.p_double[datalen+i-winw];
29914 : }
29915 : }
29916 :
29917 : /*
29918 : * Apply recurrent formula for SSA forecasting
29919 : */
29920 0 : rvectorsetlengthatleast(&s->tmp1, winw-1, _state);
29921 0 : for(i=1; i<=winw-1; i++)
29922 : {
29923 0 : s->tmp1.ptr.p_double[i-1] = s->fctrend.ptr.p_double[i];
29924 : }
29925 0 : for(i=0; i<=forecastlen-1; i++)
29926 : {
29927 0 : v = s->forecasta.ptr.p_double[0]*s->tmp1.ptr.p_double[0];
29928 0 : for(j=1; j<=winw-2; j++)
29929 : {
29930 0 : v = v+s->forecasta.ptr.p_double[j]*s->tmp1.ptr.p_double[j];
29931 0 : s->tmp1.ptr.p_double[j-1] = s->tmp1.ptr.p_double[j];
29932 : }
29933 0 : trend->ptr.p_double[i] = v;
29934 0 : s->tmp1.ptr.p_double[winw-2] = v;
29935 : }
29936 : }
29937 :
29938 :
29939 : /*************************************************************************
29940 : This function builds SSA basis and performs forecasting for a specified
29941 : number of ticks, returning value of trend.
29942 :
29943 : Forecast is performed as follows:
29944 : * SSA trend extraction is applied to last M sliding windows of the
29945 : internally stored dataset
29946 : * for each of M sliding windows, M predictions are built
29947 : * average value of M predictions is returned
29948 :
29949 : This function has following running time:
29950 : * O(NBasis*WindowWidth*M) for trend extraction phase (always performed)
29951 : * O(WindowWidth*NTicks*M) for forecast phase
29952 :
29953 : NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
29954 : apply recurrence relation to raw unprocessed data, use another
29955 : function - ssaforecastsequence() which allows to turn on and off
29956 : noise reduction phase.
29957 :
29958 : NOTE: combination of several predictions results in lesser sensitivity to
29959 : noise, but it may produce undesirable discontinuities between last
29960 : point of the trend and first point of the prediction. The reason is
29961 : that last point of the trend is usually corrupted by noise, but
29962 : average value of several predictions is less sensitive to noise,
29963 : thus discontinuity appears. It is not a bug.
29964 :
29965 : INPUT PARAMETERS:
29966 : S - SSA model
29967 : M - number of sliding windows to combine, M>=1. If
29968 : your dataset has less than M sliding windows, this
29969 : parameter will be silently reduced.
29970 : NTicks - number of ticks to forecast, NTicks>=1
29971 :
29972 : OUTPUT PARAMETERS:
29973 : Trend - array[NTicks], predicted trend line
29974 :
29975 :
29976 : CACHING/REUSE OF THE BASIS
29977 :
29978 : Caching/reuse of previous results is performed:
29979 : * first call performs full run of SSA; basis is stored in the cache
29980 : * subsequent calls reuse previously cached basis
29981 : * if you call any function which changes model properties (window length,
29982 : algorithm, dataset), internal basis will be invalidated.
29983 : * the only calls which do NOT invalidate basis are listed below:
29984 : a) ssasetwindow() with same window length
29985 : b) ssaappendpointandupdate()
29986 : c) ssaappendsequenceandupdate()
29987 : d) ssasetalgotopk...() with exactly same K
29988 : Calling these functions will result in reuse of previously found basis.
29989 :
29990 :
29991 : HANDLING OF DEGENERATE CASES
29992 :
29993 : Following degenerate cases may happen:
29994 : * dataset is empty (no analysis can be done)
29995 : * all sequences are shorter than the window length,no analysis can be done
29996 : * no algorithm is specified (no analysis can be done)
29997 : * last sequence is shorter than the WindowWidth (analysis can be done,
29998 : but we can not perform forecasting on the last sequence)
29999 : * window lentgh is 1 (impossible to use for forecasting)
30000 : * SSA analysis algorithm is configured to extract basis whose size is
30001 : equal to window length (impossible to use for forecasting; only basis
30002 : whose size is less than window length can be used).
30003 :
30004 : Calling this function in degenerate cases returns following result:
30005 : * NTicks copies of the last value is returned for non-empty task with
30006 : large enough dataset, but with overcomplete basis (window width=1 or
30007 : basis size is equal to window width)
30008 : * zero trend with length=NTicks is returned for empty task
30009 :
30010 : No analysis is performed in degenerate cases (we immediately return dummy
30011 : values, no basis is ever constructed).
30012 :
30013 : -- ALGLIB --
30014 : Copyright 30.10.2017 by Bochkanov Sergey
30015 : *************************************************************************/
30016 0 : void ssaforecastavglast(ssamodel* s,
30017 : ae_int_t m,
30018 : ae_int_t nticks,
30019 : /* Real */ ae_vector* trend,
30020 : ae_state *_state)
30021 : {
30022 : ae_int_t i;
30023 : ae_int_t winw;
30024 :
30025 0 : ae_vector_clear(trend);
30026 :
30027 0 : ae_assert(nticks>=1, "SSAForecastAvgLast: NTicks<1", _state);
30028 0 : ae_assert(m>=1, "SSAForecastAvgLast: M<1", _state);
30029 :
30030 : /*
30031 : * Init
30032 : */
30033 0 : winw = s->windowwidth;
30034 0 : ae_vector_set_length(trend, nticks, _state);
30035 :
30036 : /*
30037 : * Is it degenerate case?
30038 : */
30039 0 : if( !ssa_hassomethingtoanalyze(s, _state) )
30040 : {
30041 0 : for(i=0; i<=nticks-1; i++)
30042 : {
30043 0 : trend->ptr.p_double[i] = (double)(0);
30044 : }
30045 0 : return;
30046 : }
30047 0 : ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed", _state);
30048 0 : if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
30049 : {
30050 0 : for(i=0; i<=nticks-1; i++)
30051 : {
30052 0 : trend->ptr.p_double[i] = (double)(0);
30053 : }
30054 0 : return;
30055 : }
30056 0 : if( winw==1 )
30057 : {
30058 0 : ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed / 2355", _state);
30059 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecastAvgLast: integrity check failed", _state);
30060 0 : for(i=0; i<=nticks-1; i++)
30061 : {
30062 0 : trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
30063 : }
30064 0 : return;
30065 : }
30066 :
30067 : /*
30068 : * Update basis and recurrent relation.
30069 : *
30070 : * It will take care of basis validity flags. AppendLen=0 which means
30071 : * that we perform initial basis evaluation.
30072 : */
30073 0 : ssa_updatebasis(s, 0, 0.0, _state);
30074 0 : ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecastAvgLast: integrity check failed / 4f5et", _state);
30075 0 : if( s->nbasis==winw )
30076 : {
30077 :
30078 : /*
30079 : * Handle degenerate situation with basis whose size
30080 : * is equal to window length.
30081 : */
30082 0 : ae_assert(s->nsequences>0, "SSAForecastAvgLast: integrity check failed / 2355", _state);
30083 0 : ae_assert(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]>0, "SSAForecastAvgLast: integrity check failed", _state);
30084 0 : for(i=0; i<=nticks-1; i++)
30085 : {
30086 0 : trend->ptr.p_double[i] = s->sequencedata.ptr.p_double[s->sequenceidx.ptr.p_int[s->nsequences]-1];
30087 : }
30088 0 : return;
30089 : }
30090 :
30091 : /*
30092 : * Decrease M if we have less than M sliding windows.
30093 : * Forecast.
30094 : */
30095 0 : m = ae_minint(m, s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, _state);
30096 0 : ae_assert(m>=1, "SSAForecastAvgLast: integrity check failed", _state);
30097 0 : ssa_forecastavgsequence(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences-1], s->sequenceidx.ptr.p_int[s->nsequences], m, nticks, ae_true, trend, 0, _state);
30098 : }
30099 :
30100 :
30101 : /*************************************************************************
30102 : This function builds SSA basis and performs forecasting for a user-
30103 : specified sequence, returning value of trend.
30104 :
30105 : Forecasting is done in two stages:
30106 : * first, we extract trend from M last sliding windows of the sequence.
30107 : This stage is optional, you can turn it off if you pass data which
30108 : are already processed with SSA. Of course, you can turn it off even
30109 : for raw data, but it is not recommended - noise suppression is very
30110 : important for correct prediction.
30111 : * then, we apply LRR independently for M sliding windows
30112 : * average of M predictions is returned
30113 :
30114 : This function has following running time:
30115 : * O(NBasis*WindowWidth*M) for trend extraction phase
30116 : * O(WindowWidth*NTicks*M) for forecast phase
30117 :
30118 : NOTE: combination of several predictions results in lesser sensitivity to
30119 : noise, but it may produce undesirable discontinuities between last
30120 : point of the trend and first point of the prediction. The reason is
30121 : that last point of the trend is usually corrupted by noise, but
30122 : average value of several predictions is less sensitive to noise,
30123 : thus discontinuity appears. It is not a bug.
30124 :
30125 : INPUT PARAMETERS:
30126 : S - SSA model
30127 : Data - array[NTicks], data to forecast
30128 : DataLen - number of ticks in the data, DataLen>=1
30129 : M - number of sliding windows to combine, M>=1. If
30130 : your dataset has less than M sliding windows, this
30131 : parameter will be silently reduced.
30132 : ForecastLen - number of ticks to predict, ForecastLen>=1
30133 : ApplySmoothing - whether to apply smoothing trend extraction or not.
30134 : if you do not know what to specify, pass true.
30135 :
30136 : OUTPUT PARAMETERS:
30137 : Trend - array[ForecastLen], forecasted trend
30138 :
30139 :
30140 : CACHING/REUSE OF THE BASIS
30141 :
30142 : Caching/reuse of previous results is performed:
30143 : * first call performs full run of SSA; basis is stored in the cache
30144 : * subsequent calls reuse previously cached basis
30145 : * if you call any function which changes model properties (window length,
30146 : algorithm, dataset), internal basis will be invalidated.
30147 : * the only calls which do NOT invalidate basis are listed below:
30148 : a) ssasetwindow() with same window length
30149 : b) ssaappendpointandupdate()
30150 : c) ssaappendsequenceandupdate()
30151 : d) ssasetalgotopk...() with exactly same K
30152 : Calling these functions will result in reuse of previously found basis.
30153 :
30154 :
30155 : HANDLING OF DEGENERATE CASES
30156 :
30157 : Following degenerate cases may happen:
30158 : * dataset is empty (no analysis can be done)
30159 : * all sequences are shorter than the window length,no analysis can be done
30160 : * no algorithm is specified (no analysis can be done)
30161 : * data sequence is shorter than the WindowWidth (analysis can be done,
30162 : but we can not perform forecasting on the last sequence)
30163 : * window lentgh is 1 (impossible to use for forecasting)
30164 : * SSA analysis algorithm is configured to extract basis whose size is
30165 : equal to window length (impossible to use for forecasting; only basis
30166 : whose size is less than window length can be used).
30167 :
30168 : Calling this function in degenerate cases returns following result:
30169 : * ForecastLen copies of the last value is returned for non-empty task with
30170 : large enough dataset, but with overcomplete basis (window width=1 or
30171 : basis size is equal to window width)
30172 : * zero trend with length=ForecastLen is returned for empty task
30173 :
30174 : No analysis is performed in degenerate cases (we immediately return dummy
30175 : values, no basis is ever constructed).
30176 :
30177 : -- ALGLIB --
30178 : Copyright 30.10.2017 by Bochkanov Sergey
30179 : *************************************************************************/
30180 0 : void ssaforecastavgsequence(ssamodel* s,
30181 : /* Real */ ae_vector* data,
30182 : ae_int_t datalen,
30183 : ae_int_t m,
30184 : ae_int_t forecastlen,
30185 : ae_bool applysmoothing,
30186 : /* Real */ ae_vector* trend,
30187 : ae_state *_state)
30188 : {
30189 : ae_int_t i;
30190 : ae_int_t winw;
30191 :
30192 0 : ae_vector_clear(trend);
30193 :
30194 0 : ae_assert(datalen>=1, "SSAForecastAvgSequence: DataLen<1", _state);
30195 0 : ae_assert(m>=1, "SSAForecastAvgSequence: M<1", _state);
30196 0 : ae_assert(data->cnt>=datalen, "SSAForecastAvgSequence: Data is too short", _state);
30197 0 : ae_assert(isfinitevector(data, datalen, _state), "SSAForecastAvgSequence: Data contains infinities NANs", _state);
30198 0 : ae_assert(forecastlen>=1, "SSAForecastAvgSequence: ForecastLen<1", _state);
30199 :
30200 : /*
30201 : * Init
30202 : */
30203 0 : winw = s->windowwidth;
30204 0 : ae_vector_set_length(trend, forecastlen, _state);
30205 :
30206 : /*
30207 : * Is it degenerate case?
30208 : */
30209 0 : if( !ssa_hassomethingtoanalyze(s, _state)||datalen<winw )
30210 : {
30211 0 : for(i=0; i<=forecastlen-1; i++)
30212 : {
30213 0 : trend->ptr.p_double[i] = (double)(0);
30214 : }
30215 0 : return;
30216 : }
30217 0 : if( winw==1 )
30218 : {
30219 0 : for(i=0; i<=forecastlen-1; i++)
30220 : {
30221 0 : trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
30222 : }
30223 0 : return;
30224 : }
30225 :
30226 : /*
30227 : * Update basis.
30228 : *
30229 : * It will take care of basis validity flags. AppendLen=0 which means
30230 : * that we perform initial basis evaluation.
30231 : */
30232 0 : ssa_updatebasis(s, 0, 0.0, _state);
30233 0 : ae_assert(s->nbasis<=winw&&s->nbasis>0, "SSAForecast: integrity check failed / 4f5et", _state);
30234 0 : if( s->nbasis==winw )
30235 : {
30236 :
30237 : /*
30238 : * Handle degenerate situation with basis whose size
30239 : * is equal to window length.
30240 : */
30241 0 : for(i=0; i<=forecastlen-1; i++)
30242 : {
30243 0 : trend->ptr.p_double[i] = data->ptr.p_double[datalen-1];
30244 : }
30245 0 : return;
30246 : }
30247 :
30248 : /*
30249 : * Decrease M if we have less than M sliding windows.
30250 : * Forecast.
30251 : */
30252 0 : m = ae_minint(m, datalen-winw+1, _state);
30253 0 : ae_assert(m>=1, "SSAForecastAvgLast: integrity check failed", _state);
30254 0 : ssa_forecastavgsequence(s, data, 0, datalen, m, forecastlen, applysmoothing, trend, 0, _state);
30255 : }
30256 :
30257 :
30258 : /*************************************************************************
30259 : This function evaluates current model and tells whether we have some data
30260 : which can be analyzed by current algorithm, or not.
30261 :
30262 : No analysis can be done in the following degenerate cases:
30263 : * dataset is empty
30264 : * all sequences are shorter than the window length
30265 : * no algorithm is specified
30266 :
30267 : -- ALGLIB --
30268 : Copyright 30.10.2017 by Bochkanov Sergey
30269 : *************************************************************************/
30270 0 : static ae_bool ssa_hassomethingtoanalyze(ssamodel* s, ae_state *_state)
30271 : {
30272 : ae_int_t i;
30273 : ae_bool allsmaller;
30274 : ae_bool isdegenerate;
30275 : ae_bool result;
30276 :
30277 :
30278 0 : isdegenerate = ae_false;
30279 0 : isdegenerate = isdegenerate||s->algotype==0;
30280 0 : isdegenerate = isdegenerate||s->nsequences==0;
30281 0 : allsmaller = ae_true;
30282 0 : for(i=0; i<=s->nsequences-1; i++)
30283 : {
30284 0 : allsmaller = allsmaller&&s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]<s->windowwidth;
30285 : }
30286 0 : isdegenerate = isdegenerate||allsmaller;
30287 0 : result = !isdegenerate;
30288 0 : return result;
30289 : }
30290 :
30291 :
30292 : /*************************************************************************
30293 : This function checks whether I-th sequence is big enough for analysis or not.
30294 :
30295 : I=-1 is used to denote last sequence (for NSequences=0)
30296 :
30297 : -- ALGLIB --
30298 : Copyright 30.10.2017 by Bochkanov Sergey
30299 : *************************************************************************/
30300 0 : static ae_bool ssa_issequencebigenough(ssamodel* s,
30301 : ae_int_t i,
30302 : ae_state *_state)
30303 : {
30304 : ae_bool result;
30305 :
30306 :
30307 0 : ae_assert(i>=-1&&i<s->nsequences, "Assertion failed", _state);
30308 0 : result = ae_false;
30309 0 : if( s->nsequences==0 )
30310 : {
30311 0 : return result;
30312 : }
30313 0 : if( i<0 )
30314 : {
30315 0 : i = s->nsequences-1;
30316 : }
30317 0 : result = s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]>=s->windowwidth;
30318 0 : return result;
30319 : }
30320 :
30321 :
30322 : /*************************************************************************
30323 : This function performs basis update. Either full update (recalculated from
30324 : the very beginning) or partial update (handles append to the end of the
30325 : dataset).
30326 :
30327 : With AppendLen=0 this function behaves as follows:
30328 : * if AreBasisAndSolverValid=False, then solver object is created from
30329 : scratch, initial calculations are performed according to specific SSA
30330 : algorithm being chosen. Basis/Solver validity flag is set to True, then
30331 : we immediately return.
30332 : * if AreBasisAndSolverValid=True, then nothing is done - we immediately
30333 : return.
30334 :
30335 : With AppendLen>0 this function behaves as follows:
30336 : * if AreBasisAndSolverValid=False, then exception is generated; you can
30337 : append points only to fully constructed basis. Call this function with
30338 : zero AppendLen BEFORE append, then perform append, then call it one more
30339 : time with non-zero AppendLen.
30340 : * if AreBasisAndSolverValid=True, then basis is incrementally updated. It
30341 : also updates recurrence relation used for prediction. It is expected that
30342 : either AppendLen=1, or AppendLen=length(last_sequence). Basis update is
30343 : performed with probability UpdateIts (larger-than-one values mean that
30344 : some amount of iterations is always performed).
30345 :
30346 :
30347 : In any case, after calling this function we either:
30348 : * have an exception
30349 : * have completely valid basis
30350 :
30351 : IMPORTANT: this function expects that we do NOT call it for degenerate tasks
30352 : (no data). So, call it after check with HasSomethingToAnalyze()
30353 : returned True.
30354 :
30355 : -- ALGLIB --
30356 : Copyright 30.10.2017 by Bochkanov Sergey
30357 : *************************************************************************/
30358 0 : static void ssa_updatebasis(ssamodel* s,
30359 : ae_int_t appendlen,
30360 : double updateits,
30361 : ae_state *_state)
30362 : {
30363 : ae_int_t i;
30364 : ae_int_t j;
30365 : ae_int_t k;
30366 : ae_int_t srcoffs;
30367 : ae_int_t dstoffs;
30368 : ae_int_t winw;
30369 : ae_int_t windowstotal;
30370 : ae_int_t requesttype;
30371 : ae_int_t requestsize;
30372 : double v;
30373 : ae_bool degeneraterecurrence;
30374 : double nu2;
30375 : ae_int_t subspaceits;
30376 : ae_bool needevd;
30377 :
30378 :
30379 0 : winw = s->windowwidth;
30380 :
30381 : /*
30382 : * Critical checks
30383 : */
30384 0 : ae_assert(appendlen>=0, "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
30385 0 : ae_assert(!(!s->arebasisandsolvervalid&&appendlen!=0), "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
30386 0 : ae_assert(!(appendlen==0&&ae_fp_greater(updateits,0.0)), "SSA: incorrect parameters passed to UpdateBasis(), integrity check failed", _state);
30387 :
30388 : /*
30389 : * Everything is OK, nothing to do
30390 : */
30391 0 : if( s->arebasisandsolvervalid&&appendlen==0 )
30392 : {
30393 0 : return;
30394 : }
30395 :
30396 : /*
30397 : * Seed RNG with fixed or random seed.
30398 : *
30399 : * RNG used when pseudorandomly deciding whether
30400 : * to re-evaluate basis or not. Sandom seed is
30401 : * important when we have several simultaneously
30402 : * calculated SSA models - we do not want them
30403 : * to be re-evaluated in same moments).
30404 : */
30405 0 : if( !s->arebasisandsolvervalid )
30406 : {
30407 0 : if( s->rngseed>0 )
30408 : {
30409 0 : hqrndseed(s->rngseed, s->rngseed+235, &s->rs, _state);
30410 : }
30411 : else
30412 : {
30413 0 : hqrndrandomize(&s->rs, _state);
30414 : }
30415 : }
30416 :
30417 : /*
30418 : * Compute XXT for algorithms which need it
30419 : */
30420 0 : if( !s->arebasisandsolvervalid )
30421 : {
30422 0 : ae_assert(appendlen==0, "SSA: integrity check failed / 34cx6", _state);
30423 0 : if( s->algotype==2 )
30424 : {
30425 :
30426 : /*
30427 : * Compute X*X^T for direct algorithm.
30428 : * Quite straightforward, no subtle optimizations.
30429 : */
30430 0 : rmatrixsetlengthatleast(&s->xxt, winw, winw, _state);
30431 0 : windowstotal = 0;
30432 0 : for(i=0; i<=s->nsequences-1; i++)
30433 : {
30434 0 : windowstotal = windowstotal+ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state);
30435 : }
30436 0 : ae_assert(windowstotal>0, "SSA: integrity check in UpdateBasis() failed / 76t34", _state);
30437 0 : for(i=0; i<=winw-1; i++)
30438 : {
30439 0 : for(j=0; j<=winw-1; j++)
30440 : {
30441 0 : s->xxt.ptr.pp_double[i][j] = (double)(0);
30442 : }
30443 : }
30444 0 : ssa_updatexxtprepare(s, windowstotal, winw, s->memorylimit, _state);
30445 0 : for(i=0; i<=s->nsequences-1; i++)
30446 : {
30447 0 : for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state)-1; j++)
30448 : {
30449 0 : ssa_updatexxtsend(s, &s->sequencedata, s->sequenceidx.ptr.p_int[i]+j, &s->xxt, _state);
30450 : }
30451 : }
30452 0 : ssa_updatexxtfinalize(s, &s->xxt, _state);
30453 : }
30454 0 : if( s->algotype==3 )
30455 : {
30456 :
30457 : /*
30458 : * Compute X*X^T for real-time algorithm:
30459 : * * prepare queue of windows to merge into XXT
30460 : * * shuffle queue in order to avoid time-related biases in algorithm
30461 : * * dequeue first chunk
30462 : */
30463 0 : rmatrixsetlengthatleast(&s->xxt, winw, winw, _state);
30464 0 : windowstotal = 0;
30465 0 : for(i=0; i<=s->nsequences-1; i++)
30466 : {
30467 0 : windowstotal = windowstotal+ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state);
30468 : }
30469 0 : ae_assert(windowstotal>0, "SSA: integrity check in UpdateBasis() failed / 76t34", _state);
30470 0 : ivectorsetlengthatleast(&s->rtqueue, windowstotal, _state);
30471 0 : dstoffs = 0;
30472 0 : for(i=0; i<=s->nsequences-1; i++)
30473 : {
30474 0 : for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[i+1]-s->sequenceidx.ptr.p_int[i]-winw+1, 0, _state)-1; j++)
30475 : {
30476 0 : srcoffs = s->sequenceidx.ptr.p_int[i]+j;
30477 0 : s->rtqueue.ptr.p_int[dstoffs] = srcoffs;
30478 0 : inc(&dstoffs, _state);
30479 : }
30480 : }
30481 0 : ae_assert(dstoffs==windowstotal, "SSA: integrity check in UpdateBasis() failed / fh45f", _state);
30482 0 : if( s->rtpowerup>1 )
30483 : {
30484 :
30485 : /*
30486 : * Shuffle queue, it helps to avoid time-related bias in algorithm
30487 : */
30488 0 : for(i=0; i<=windowstotal-1; i++)
30489 : {
30490 0 : j = i+hqrnduniformi(&s->rs, windowstotal-i, _state);
30491 0 : swapelementsi(&s->rtqueue, i, j, _state);
30492 : }
30493 : }
30494 0 : s->rtqueuecnt = windowstotal;
30495 0 : s->rtqueuechunk = 1;
30496 0 : s->rtqueuechunk = ae_maxint(s->rtqueuechunk, s->rtqueuecnt/s->rtpowerup, _state);
30497 0 : s->rtqueuechunk = ae_maxint(s->rtqueuechunk, 2*s->topk, _state);
30498 0 : ssa_realtimedequeue(s, 0.0, ae_minint(s->rtqueuechunk, s->rtqueuecnt, _state), _state);
30499 : }
30500 : }
30501 :
30502 : /*
30503 : * Handle possible updates for XXT:
30504 : * * check that append involves either last point of last sequence,
30505 : * or entire last sequence
30506 : * * if last sequence is shorter than window width, perform quick exit -
30507 : * we have nothing to update - no windows to insert into XXT
30508 : * * update XXT
30509 : */
30510 0 : if( appendlen>0 )
30511 : {
30512 0 : ae_assert(s->arebasisandsolvervalid, "SSA: integrity check failed / 5gvz3", _state);
30513 0 : ae_assert(s->nsequences>=1, "SSA: integrity check failed / 658ev", _state);
30514 0 : ae_assert(appendlen==1||appendlen==s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, "SSA: integrity check failed / sd3g7", _state);
30515 0 : if( s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]<winw )
30516 : {
30517 :
30518 : /*
30519 : * Last sequence is too short, nothing to update
30520 : */
30521 0 : return;
30522 : }
30523 0 : if( s->algotype==2||s->algotype==3 )
30524 : {
30525 0 : if( appendlen>1 )
30526 : {
30527 :
30528 : /*
30529 : * Long append, use GEMM for updates
30530 : */
30531 0 : ssa_updatexxtprepare(s, appendlen, winw, s->memorylimit, _state);
30532 0 : for(j=0; j<=ae_maxint(s->sequenceidx.ptr.p_int[s->nsequences]-s->sequenceidx.ptr.p_int[s->nsequences-1]-winw+1, 0, _state)-1; j++)
30533 : {
30534 0 : ssa_updatexxtsend(s, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences-1]+j, &s->xxt, _state);
30535 : }
30536 0 : ssa_updatexxtfinalize(s, &s->xxt, _state);
30537 : }
30538 : else
30539 : {
30540 :
30541 : /*
30542 : * Just one element is added, use rank-1 update
30543 : */
30544 0 : rmatrixger(winw, winw, &s->xxt, 0, 0, 1.0, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-winw, &s->sequencedata, s->sequenceidx.ptr.p_int[s->nsequences]-winw, _state);
30545 : }
30546 : }
30547 : }
30548 :
30549 : /*
30550 : * Now, perform basis calculation - either full recalculation (AppendLen=0)
30551 : * or quick update (AppendLen>0).
30552 : */
30553 0 : if( s->algotype==1 )
30554 : {
30555 :
30556 : /*
30557 : * Precomputed basis
30558 : */
30559 0 : if( winw!=s->precomputedwidth )
30560 : {
30561 :
30562 : /*
30563 : * Window width has changed, reset basis to zeros
30564 : */
30565 0 : s->nbasis = 1;
30566 0 : rmatrixsetlengthatleast(&s->basis, winw, 1, _state);
30567 0 : rvectorsetlengthatleast(&s->sv, 1, _state);
30568 0 : for(i=0; i<=winw-1; i++)
30569 : {
30570 0 : s->basis.ptr.pp_double[i][0] = 0.0;
30571 : }
30572 0 : s->sv.ptr.p_double[0] = 0.0;
30573 : }
30574 : else
30575 : {
30576 :
30577 : /*
30578 : * OK, use precomputed basis
30579 : */
30580 0 : s->nbasis = s->precomputednbasis;
30581 0 : rmatrixsetlengthatleast(&s->basis, winw, s->nbasis, _state);
30582 0 : rvectorsetlengthatleast(&s->sv, s->nbasis, _state);
30583 0 : for(j=0; j<=s->nbasis-1; j++)
30584 : {
30585 0 : s->sv.ptr.p_double[j] = 0.0;
30586 0 : for(i=0; i<=winw-1; i++)
30587 : {
30588 0 : s->basis.ptr.pp_double[i][j] = s->precomputedbasis.ptr.pp_double[i][j];
30589 : }
30590 : }
30591 : }
30592 0 : rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
30593 0 : rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
30594 : }
30595 : else
30596 : {
30597 0 : if( s->algotype==2 )
30598 : {
30599 :
30600 : /*
30601 : * Direct top-K algorithm
30602 : *
30603 : * Calculate eigenvectors with SMatrixEVD(), reorder by descending
30604 : * of magnitudes.
30605 : *
30606 : * Update is performed for invalid basis or for non-zero UpdateIts.
30607 : */
30608 0 : needevd = !s->arebasisandsolvervalid;
30609 0 : needevd = needevd||ae_fp_greater_eq(updateits,(double)(1));
30610 0 : needevd = needevd||ae_fp_less(hqrnduniformr(&s->rs, _state),updateits-ae_ifloor(updateits, _state));
30611 0 : if( needevd )
30612 : {
30613 0 : inc(&s->dbgcntevd, _state);
30614 0 : s->nbasis = ae_minint(winw, s->topk, _state);
30615 0 : if( !smatrixevd(&s->xxt, winw, 1, ae_true, &s->sv, &s->basis, _state) )
30616 : {
30617 0 : ae_assert(ae_false, "SSA: SMatrixEVD failed", _state);
30618 : }
30619 0 : for(i=0; i<=winw-1; i++)
30620 : {
30621 0 : k = winw-1-i;
30622 0 : if( i>=k )
30623 : {
30624 0 : break;
30625 : }
30626 0 : v = s->sv.ptr.p_double[i];
30627 0 : s->sv.ptr.p_double[i] = s->sv.ptr.p_double[k];
30628 0 : s->sv.ptr.p_double[k] = v;
30629 0 : for(j=0; j<=winw-1; j++)
30630 : {
30631 0 : v = s->basis.ptr.pp_double[j][i];
30632 0 : s->basis.ptr.pp_double[j][i] = s->basis.ptr.pp_double[j][k];
30633 0 : s->basis.ptr.pp_double[j][k] = v;
30634 : }
30635 : }
30636 0 : for(i=0; i<=s->nbasis-1; i++)
30637 : {
30638 0 : s->sv.ptr.p_double[i] = ae_sqrt(ae_maxreal(s->sv.ptr.p_double[i], 0.0, _state), _state);
30639 : }
30640 0 : rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
30641 0 : rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
30642 : }
30643 : }
30644 : else
30645 : {
30646 0 : if( s->algotype==3 )
30647 : {
30648 :
30649 : /*
30650 : * Real-time top-K.
30651 : *
30652 : * Determine actual number of basis components, prepare subspace
30653 : * solver (either create from scratch or reuse).
30654 : *
30655 : * Update is always performed for invalid basis; for a valid basis
30656 : * it is performed with probability UpdateIts.
30657 : */
30658 0 : if( s->rtpowerup==1 )
30659 : {
30660 0 : subspaceits = s->defaultsubspaceits;
30661 : }
30662 : else
30663 : {
30664 0 : subspaceits = 3;
30665 : }
30666 0 : if( appendlen>0 )
30667 : {
30668 0 : ae_assert(s->arebasisandsolvervalid, "SSA: integrity check in UpdateBasis() failed / srg6f", _state);
30669 0 : ae_assert(ae_fp_greater_eq(updateits,(double)(0)), "SSA: integrity check in UpdateBasis() failed / srg4f", _state);
30670 0 : subspaceits = ae_ifloor(updateits, _state);
30671 0 : if( ae_fp_less(hqrnduniformr(&s->rs, _state),updateits-ae_ifloor(updateits, _state)) )
30672 : {
30673 0 : inc(&subspaceits, _state);
30674 : }
30675 0 : ae_assert(subspaceits>=0, "SSA: integrity check in UpdateBasis() failed / srg9f", _state);
30676 : }
30677 :
30678 : /*
30679 : * Dequeue pending dataset and merge it into XXT.
30680 : *
30681 : * Dequeuing is done only for appends, and only when we have
30682 : * non-empty queue.
30683 : */
30684 0 : if( appendlen>0&&s->rtqueuecnt>0 )
30685 : {
30686 0 : ssa_realtimedequeue(s, 1.0, ae_minint(s->rtqueuechunk, s->rtqueuecnt, _state), _state);
30687 : }
30688 :
30689 : /*
30690 : * Now, proceed to solver
30691 : */
30692 0 : if( subspaceits>0 )
30693 : {
30694 0 : if( appendlen==0 )
30695 : {
30696 0 : s->nbasis = ae_minint(winw, s->topk, _state);
30697 0 : eigsubspacecreatebuf(winw, s->nbasis, &s->solver, _state);
30698 : }
30699 : else
30700 : {
30701 0 : eigsubspacesetwarmstart(&s->solver, ae_true, _state);
30702 : }
30703 0 : eigsubspacesetcond(&s->solver, 0.0, subspaceits, _state);
30704 :
30705 : /*
30706 : * Perform initial basis estimation
30707 : */
30708 0 : inc(&s->dbgcntevd, _state);
30709 0 : eigsubspaceoocstart(&s->solver, 0, _state);
30710 0 : while(eigsubspaceooccontinue(&s->solver, _state))
30711 : {
30712 0 : eigsubspaceoocgetrequestinfo(&s->solver, &requesttype, &requestsize, _state);
30713 0 : ae_assert(requesttype==0, "SSA: integrity check in UpdateBasis() failed / 346372", _state);
30714 0 : rmatrixgemm(winw, requestsize, winw, 1.0, &s->xxt, 0, 0, 0, &s->solver.x, 0, 0, 0, 0.0, &s->solver.ax, 0, 0, _state);
30715 : }
30716 0 : eigsubspaceoocstop(&s->solver, &s->sv, &s->basis, &s->solverrep, _state);
30717 0 : for(i=0; i<=s->nbasis-1; i++)
30718 : {
30719 0 : s->sv.ptr.p_double[i] = ae_sqrt(ae_maxreal(s->sv.ptr.p_double[i], 0.0, _state), _state);
30720 : }
30721 0 : rmatrixsetlengthatleast(&s->basist, s->nbasis, winw, _state);
30722 0 : rmatrixtranspose(winw, s->nbasis, &s->basis, 0, 0, &s->basist, 0, 0, _state);
30723 : }
30724 : }
30725 : else
30726 : {
30727 0 : ae_assert(ae_false, "SSA: integrity check in UpdateBasis() failed / dfgs34", _state);
30728 : }
30729 : }
30730 : }
30731 :
30732 : /*
30733 : * Update recurrent relation
30734 : */
30735 0 : rvectorsetlengthatleast(&s->forecasta, ae_maxint(winw-1, 1, _state), _state);
30736 0 : degeneraterecurrence = ae_false;
30737 0 : if( winw>1 )
30738 : {
30739 :
30740 : /*
30741 : * Non-degenerate case
30742 : */
30743 0 : rvectorsetlengthatleast(&s->tmp0, s->nbasis, _state);
30744 0 : nu2 = 0.0;
30745 0 : for(i=0; i<=s->nbasis-1; i++)
30746 : {
30747 0 : v = s->basist.ptr.pp_double[i][winw-1];
30748 0 : s->tmp0.ptr.p_double[i] = v;
30749 0 : nu2 = nu2+v*v;
30750 : }
30751 0 : if( ae_fp_less(nu2,1-1000*ae_machineepsilon) )
30752 : {
30753 0 : rmatrixgemv(winw-1, s->nbasis, 1/(1-nu2), &s->basist, 0, 0, 1, &s->tmp0, 0, 0.0, &s->forecasta, 0, _state);
30754 : }
30755 : else
30756 : {
30757 0 : degeneraterecurrence = ae_true;
30758 : }
30759 : }
30760 : else
30761 : {
30762 0 : degeneraterecurrence = ae_true;
30763 : }
30764 0 : if( degeneraterecurrence )
30765 : {
30766 0 : for(i=0; i<=ae_maxint(winw-1, 1, _state)-1; i++)
30767 : {
30768 0 : s->forecasta.ptr.p_double[i] = 0.0;
30769 : }
30770 0 : s->forecasta.ptr.p_double[ae_maxint(winw-1, 1, _state)-1] = 1.0;
30771 : }
30772 :
30773 : /*
30774 : * Set validity flag
30775 : */
30776 0 : s->arebasisandsolvervalid = ae_true;
30777 : }
30778 :
30779 :
30780 : /*************************************************************************
30781 : This function performs analysis using current basis. It assumes and checks
30782 : that validity flag AreBasisAndSolverValid is set.
30783 :
30784 : INPUT PARAMETERS:
30785 : S - model
30786 : Data - array which holds data in elements [I0,I1):
30787 : * right bound is not included.
30788 : * I1-I0>=WindowWidth (assertion is performed).
30789 : Trend - preallocated output array, large enough
30790 : Noise - preallocated output array, large enough
30791 : Offs - offset in Trend/Noise where result is stored;
30792 : I1-I0 elements are written starting at offset
30793 : Offs.
30794 :
30795 : OUTPUT PARAMETERS:
30796 : Trend, Noise - processing results
30797 :
30798 :
30799 : -- ALGLIB --
30800 : Copyright 30.10.2017 by Bochkanov Sergey
30801 : *************************************************************************/
30802 0 : static void ssa_analyzesequence(ssamodel* s,
30803 : /* Real */ ae_vector* data,
30804 : ae_int_t i0,
30805 : ae_int_t i1,
30806 : /* Real */ ae_vector* trend,
30807 : /* Real */ ae_vector* noise,
30808 : ae_int_t offs,
30809 : ae_state *_state)
30810 : {
30811 : ae_int_t winw;
30812 : ae_int_t nwindows;
30813 : ae_int_t i;
30814 : ae_int_t j;
30815 : ae_int_t k;
30816 : ae_int_t cnt;
30817 : ae_int_t batchstart;
30818 : ae_int_t batchlimit;
30819 : ae_int_t batchsize;
30820 :
30821 :
30822 0 : ae_assert(s->arebasisandsolvervalid, "AnalyzeSequence: integrity check failed / d84sz0", _state);
30823 0 : ae_assert(i1-i0>=s->windowwidth, "AnalyzeSequence: integrity check failed / d84sz1", _state);
30824 0 : ae_assert(s->nbasis>=1, "AnalyzeSequence: integrity check failed / d84sz2", _state);
30825 0 : nwindows = i1-i0-s->windowwidth+1;
30826 0 : winw = s->windowwidth;
30827 0 : batchlimit = ae_maxint(nwindows, 1, _state);
30828 0 : if( s->memorylimit>0 )
30829 : {
30830 0 : batchlimit = ae_minint(batchlimit, ae_maxint(s->memorylimit/winw, 4*winw, _state), _state);
30831 : }
30832 :
30833 : /*
30834 : * Zero-initialize trend and counts
30835 : */
30836 0 : cnt = i1-i0;
30837 0 : ivectorsetlengthatleast(&s->aseqcounts, cnt, _state);
30838 0 : for(i=0; i<=cnt-1; i++)
30839 : {
30840 0 : s->aseqcounts.ptr.p_int[i] = 0;
30841 0 : trend->ptr.p_double[offs+i] = 0.0;
30842 : }
30843 :
30844 : /*
30845 : * Reset temporaries if algorithm settings changed since last round
30846 : */
30847 0 : if( s->aseqtrajectory.cols!=winw )
30848 : {
30849 0 : ae_matrix_set_length(&s->aseqtrajectory, 0, 0, _state);
30850 : }
30851 0 : if( s->aseqtbproduct.cols!=s->nbasis )
30852 : {
30853 0 : ae_matrix_set_length(&s->aseqtbproduct, 0, 0, _state);
30854 : }
30855 :
30856 : /*
30857 : * Perform batch processing
30858 : */
30859 0 : rmatrixsetlengthatleast(&s->aseqtrajectory, batchlimit, winw, _state);
30860 0 : rmatrixsetlengthatleast(&s->aseqtbproduct, batchlimit, s->nbasis, _state);
30861 0 : batchsize = 0;
30862 0 : batchstart = offs;
30863 0 : for(i=0; i<=nwindows-1; i++)
30864 : {
30865 :
30866 : /*
30867 : * Enqueue next row of trajectory matrix
30868 : */
30869 0 : if( batchsize==0 )
30870 : {
30871 0 : batchstart = i;
30872 : }
30873 0 : for(j=0; j<=winw-1; j++)
30874 : {
30875 0 : s->aseqtrajectory.ptr.pp_double[batchsize][j] = data->ptr.p_double[i0+i+j];
30876 : }
30877 0 : inc(&batchsize, _state);
30878 :
30879 : /*
30880 : * Process batch
30881 : */
30882 0 : if( batchsize==batchlimit||i==nwindows-1 )
30883 : {
30884 :
30885 : /*
30886 : * Project onto basis
30887 : */
30888 0 : rmatrixgemm(batchsize, s->nbasis, winw, 1.0, &s->aseqtrajectory, 0, 0, 0, &s->basist, 0, 0, 1, 0.0, &s->aseqtbproduct, 0, 0, _state);
30889 0 : rmatrixgemm(batchsize, winw, s->nbasis, 1.0, &s->aseqtbproduct, 0, 0, 0, &s->basist, 0, 0, 0, 0.0, &s->aseqtrajectory, 0, 0, _state);
30890 :
30891 : /*
30892 : * Hankelize
30893 : */
30894 0 : for(k=0; k<=batchsize-1; k++)
30895 : {
30896 0 : for(j=0; j<=winw-1; j++)
30897 : {
30898 0 : trend->ptr.p_double[offs+batchstart+k+j] = trend->ptr.p_double[offs+batchstart+k+j]+s->aseqtrajectory.ptr.pp_double[k][j];
30899 0 : s->aseqcounts.ptr.p_int[batchstart+k+j] = s->aseqcounts.ptr.p_int[batchstart+k+j]+1;
30900 : }
30901 : }
30902 :
30903 : /*
30904 : * Reset batch size
30905 : */
30906 0 : batchsize = 0;
30907 : }
30908 : }
30909 0 : for(i=0; i<=cnt-1; i++)
30910 : {
30911 0 : trend->ptr.p_double[offs+i] = trend->ptr.p_double[offs+i]/s->aseqcounts.ptr.p_int[i];
30912 : }
30913 :
30914 : /*
30915 : * Output noise
30916 : */
30917 0 : for(i=0; i<=cnt-1; i++)
30918 : {
30919 0 : noise->ptr.p_double[offs+i] = data->ptr.p_double[i0+i]-trend->ptr.p_double[offs+i];
30920 : }
30921 0 : }
30922 :
30923 :
30924 : /*************************************************************************
30925 : This function performs averaged forecasting. It assumes that basis is
30926 : already built, everything is valid and checked. See comments on similar
30927 : public functions to find out more about averaged predictions.
30928 :
30929 : INPUT PARAMETERS:
30930 : S - model
30931 : Data - array which holds data in elements [I0,I1):
30932 : * right bound is not included.
30933 : * I1-I0>=WindowWidth (assertion is performed).
30934 : M - number of sliding windows to combine, M>=1. If
30935 : your dataset has less than M sliding windows, this
30936 : parameter will be silently reduced.
30937 : ForecastLen - number of ticks to predict, ForecastLen>=1
30938 : Trend - preallocated output array, large enough
30939 : Offs - offset in Trend where result is stored;
30940 : I1-I0 elements are written starting at offset
30941 : Offs.
30942 :
30943 : OUTPUT PARAMETERS:
30944 : Trend - array[ForecastLen], forecasted trend
30945 :
30946 : -- ALGLIB --
30947 : Copyright 30.10.2017 by Bochkanov Sergey
30948 : *************************************************************************/
30949 0 : static void ssa_forecastavgsequence(ssamodel* s,
30950 : /* Real */ ae_vector* data,
30951 : ae_int_t i0,
30952 : ae_int_t i1,
30953 : ae_int_t m,
30954 : ae_int_t forecastlen,
30955 : ae_bool smooth,
30956 : /* Real */ ae_vector* trend,
30957 : ae_int_t offs,
30958 : ae_state *_state)
30959 : {
30960 : ae_int_t i;
30961 : ae_int_t j;
30962 : ae_int_t k;
30963 : ae_int_t winw;
30964 :
30965 :
30966 0 : ae_assert(s->arebasisandsolvervalid, "ForecastAvgSequence: integrity check failed / d84sz0", _state);
30967 0 : ae_assert(i1-i0-s->windowwidth+1>=m, "ForecastAvgSequence: integrity check failed / d84sz1", _state);
30968 0 : ae_assert(s->nbasis>=1, "ForecastAvgSequence: integrity check failed / d84sz2", _state);
30969 0 : ae_assert(s->windowwidth>=2, "ForecastAvgSequence: integrity check failed / 5tgdg5", _state);
30970 0 : ae_assert(s->windowwidth>s->nbasis, "ForecastAvgSequence: integrity check failed / d5g56w", _state);
30971 0 : winw = s->windowwidth;
30972 :
30973 : /*
30974 : * Prepare M synchronized predictions for the last known tick
30975 : * (last one is an actual value of the trend, previous M-1 predictions
30976 : * are predictions from differently positioned sliding windows).
30977 : */
30978 0 : rmatrixsetlengthatleast(&s->fctrendm, m, winw, _state);
30979 0 : rvectorsetlengthatleast(&s->tmp0, ae_maxint(m, s->nbasis, _state), _state);
30980 0 : rvectorsetlengthatleast(&s->tmp1, winw, _state);
30981 0 : for(k=0; k<=m-1; k++)
30982 : {
30983 :
30984 : /*
30985 : * Perform prediction for rows [0,K-1]
30986 : */
30987 0 : rmatrixgemv(k, winw-1, 1.0, &s->fctrendm, 0, 1, 0, &s->forecasta, 0, 0.0, &s->tmp0, 0, _state);
30988 0 : for(i=0; i<=k-1; i++)
30989 : {
30990 0 : for(j=1; j<=winw-1; j++)
30991 : {
30992 0 : s->fctrendm.ptr.pp_double[i][j-1] = s->fctrendm.ptr.pp_double[i][j];
30993 : }
30994 0 : s->fctrendm.ptr.pp_double[i][winw-1] = s->tmp0.ptr.p_double[i];
30995 : }
30996 :
30997 : /*
30998 : * Perform trend extraction for row K, add it to dataset
30999 : */
31000 0 : if( smooth )
31001 : {
31002 0 : rmatrixgemv(s->nbasis, winw, 1.0, &s->basist, 0, 0, 0, data, i1-winw-(m-1-k), 0.0, &s->tmp0, 0, _state);
31003 0 : rmatrixgemv(s->windowwidth, s->nbasis, 1.0, &s->basis, 0, 0, 0, &s->tmp0, 0, 0.0, &s->tmp1, 0, _state);
31004 0 : for(j=0; j<=winw-1; j++)
31005 : {
31006 0 : s->fctrendm.ptr.pp_double[k][j] = s->tmp1.ptr.p_double[j];
31007 : }
31008 : }
31009 : else
31010 : {
31011 0 : for(j=0; j<=winw-1; j++)
31012 : {
31013 0 : s->fctrendm.ptr.pp_double[k][j] = data->ptr.p_double[i1-winw-(m-1-k)+j];
31014 : }
31015 : }
31016 : }
31017 :
31018 : /*
31019 : * Now we have M synchronized predictions of the sequence state at the last
31020 : * know moment (last "prediction" is just a copy of the trend). Let's start
31021 : * batch prediction!
31022 : */
31023 0 : for(k=0; k<=forecastlen-1; k++)
31024 : {
31025 0 : rmatrixgemv(m, winw-1, 1.0, &s->fctrendm, 0, 1, 0, &s->forecasta, 0, 0.0, &s->tmp0, 0, _state);
31026 0 : trend->ptr.p_double[offs+k] = 0.0;
31027 0 : for(i=0; i<=m-1; i++)
31028 : {
31029 0 : for(j=1; j<=winw-1; j++)
31030 : {
31031 0 : s->fctrendm.ptr.pp_double[i][j-1] = s->fctrendm.ptr.pp_double[i][j];
31032 : }
31033 0 : s->fctrendm.ptr.pp_double[i][winw-1] = s->tmp0.ptr.p_double[i];
31034 0 : trend->ptr.p_double[offs+k] = trend->ptr.p_double[offs+k]+s->tmp0.ptr.p_double[i];
31035 : }
31036 0 : trend->ptr.p_double[offs+k] = trend->ptr.p_double[offs+k]/m;
31037 : }
31038 0 : }
31039 :
31040 :
31041 : /*************************************************************************
31042 : This function extracts updates from real-time queue and applies them to
31043 : the S.XXT matrix. XXT is premultiplied by Beta, which can be 0.0 for
31044 : initial creation, 1.0 for subsequent updates, or even within (0,1) for some
31045 : kind of updates with decay.
31046 :
31047 : INPUT PARAMETERS:
31048 : S - model
31049 : Beta - >=0, coefficient to premultiply XXT
31050 : Cnt - 0<Cnt<=S.RTQueueCnt, number of updates to extract
31051 : from the end of the queue
31052 :
31053 : OUTPUT PARAMETERS:
31054 : S - S.XXT updated, S.RTQueueCnt decreased
31055 :
31056 : -- ALGLIB --
31057 : Copyright 30.10.2017 by Bochkanov Sergey
31058 : *************************************************************************/
31059 0 : static void ssa_realtimedequeue(ssamodel* s,
31060 : double beta,
31061 : ae_int_t cnt,
31062 : ae_state *_state)
31063 : {
31064 : ae_int_t i;
31065 : ae_int_t j;
31066 : ae_int_t winw;
31067 :
31068 :
31069 0 : ae_assert(cnt>0, "SSA: RealTimeDequeue() integrity check failed / 43tdv", _state);
31070 0 : ae_assert(ae_isfinite(beta, _state)&&ae_fp_greater_eq(beta,(double)(0)), "SSA: RealTimeDequeue() integrity check failed / 5gdg6", _state);
31071 0 : ae_assert(cnt<=s->rtqueuecnt, "SSA: RealTimeDequeue() integrity check failed / 547yh", _state);
31072 0 : ae_assert(s->xxt.cols>=s->windowwidth, "SSA: RealTimeDequeue() integrity check failed / 54bf4", _state);
31073 0 : ae_assert(s->xxt.rows>=s->windowwidth, "SSA: RealTimeDequeue() integrity check failed / 9gdfn", _state);
31074 0 : winw = s->windowwidth;
31075 :
31076 : /*
31077 : * Premultiply XXT by Beta
31078 : */
31079 0 : if( ae_fp_neq(beta,(double)(0)) )
31080 : {
31081 0 : for(i=0; i<=winw-1; i++)
31082 : {
31083 0 : for(j=0; j<=winw-1; j++)
31084 : {
31085 0 : s->xxt.ptr.pp_double[i][j] = s->xxt.ptr.pp_double[i][j]*beta;
31086 : }
31087 : }
31088 : }
31089 : else
31090 : {
31091 0 : for(i=0; i<=winw-1; i++)
31092 : {
31093 0 : for(j=0; j<=winw-1; j++)
31094 : {
31095 0 : s->xxt.ptr.pp_double[i][j] = (double)(0);
31096 : }
31097 : }
31098 : }
31099 :
31100 : /*
31101 : * Dequeue
31102 : */
31103 0 : ssa_updatexxtprepare(s, cnt, winw, s->memorylimit, _state);
31104 0 : for(i=0; i<=cnt-1; i++)
31105 : {
31106 0 : ssa_updatexxtsend(s, &s->sequencedata, s->rtqueue.ptr.p_int[s->rtqueuecnt-1], &s->xxt, _state);
31107 0 : dec(&s->rtqueuecnt, _state);
31108 : }
31109 0 : ssa_updatexxtfinalize(s, &s->xxt, _state);
31110 0 : }
31111 :
31112 :
31113 : /*************************************************************************
31114 : This function prepares batch buffer for XXT update. The idea is that we
31115 : send a stream of "XXT += u*u'" updates, and we want to package them into
31116 : one big matrix update U*U', applied with SYRK() kernel, but U can consume
31117 : too much memory, so we want to transparently divide it into few smaller
31118 : chunks.
31119 :
31120 : This set of functions solves this problem:
31121 : * UpdateXXTPrepare() prepares temporary buffers
31122 : * UpdateXXTSend() sends next u to the buffer, possibly initiating next SYRK()
31123 : * UpdateXXTFinalize() performs last SYRK() update
31124 :
31125 : INPUT PARAMETERS:
31126 : S - model, only fields with UX prefix are used
31127 : UpdateSize - number of updates
31128 : WindowWidth - window width, >0
31129 : MemoryLimit - memory limit, non-positive value means no limit
31130 :
31131 : OUTPUT PARAMETERS:
31132 : S - UX temporaries updated
31133 :
31134 : -- ALGLIB --
31135 : Copyright 20.12.2017 by Bochkanov Sergey
31136 : *************************************************************************/
31137 0 : static void ssa_updatexxtprepare(ssamodel* s,
31138 : ae_int_t updatesize,
31139 : ae_int_t windowwidth,
31140 : ae_int_t memorylimit,
31141 : ae_state *_state)
31142 : {
31143 :
31144 :
31145 0 : ae_assert(windowwidth>0, "UpdateXXTPrepare: WinW<=0", _state);
31146 0 : s->uxbatchlimit = ae_maxint(updatesize, 1, _state);
31147 0 : if( memorylimit>0 )
31148 : {
31149 0 : s->uxbatchlimit = ae_minint(s->uxbatchlimit, ae_maxint(memorylimit/windowwidth, 4*windowwidth, _state), _state);
31150 : }
31151 0 : s->uxbatchwidth = windowwidth;
31152 0 : s->uxbatchsize = 0;
31153 0 : if( s->uxbatch.cols!=windowwidth )
31154 : {
31155 0 : ae_matrix_set_length(&s->uxbatch, 0, 0, _state);
31156 : }
31157 0 : rmatrixsetlengthatleast(&s->uxbatch, s->uxbatchlimit, windowwidth, _state);
31158 0 : }
31159 :
31160 :
31161 : /*************************************************************************
31162 : This function sends update u*u' to the batch buffer.
31163 :
31164 : INPUT PARAMETERS:
31165 : S - model, only fields with UX prefix are used
31166 : U - WindowWidth-sized update, starts at I0
31167 : I0 - starting position for update
31168 :
31169 : OUTPUT PARAMETERS:
31170 : S - UX temporaries updated
31171 : XXT - array[WindowWidth,WindowWidth], in the middle
31172 : of update. All intermediate updates are
31173 : applied to the upper triangle.
31174 :
31175 : -- ALGLIB --
31176 : Copyright 20.12.2017 by Bochkanov Sergey
31177 : *************************************************************************/
31178 0 : static void ssa_updatexxtsend(ssamodel* s,
31179 : /* Real */ ae_vector* u,
31180 : ae_int_t i0,
31181 : /* Real */ ae_matrix* xxt,
31182 : ae_state *_state)
31183 : {
31184 :
31185 :
31186 0 : ae_assert(i0+s->uxbatchwidth-1<u->cnt, "UpdateXXTSend: incorrect U size", _state);
31187 0 : ae_assert(s->uxbatchsize>=0, "UpdateXXTSend: integrity check failure", _state);
31188 0 : ae_assert(s->uxbatchsize<=s->uxbatchlimit, "UpdateXXTSend: integrity check failure", _state);
31189 0 : ae_assert(s->uxbatchlimit>=1, "UpdateXXTSend: integrity check failure", _state);
31190 :
31191 : /*
31192 : * Send pending batch if full
31193 : */
31194 0 : if( s->uxbatchsize==s->uxbatchlimit )
31195 : {
31196 0 : rmatrixsyrk(s->uxbatchwidth, s->uxbatchsize, 1.0, &s->uxbatch, 0, 0, 2, 1.0, xxt, 0, 0, ae_true, _state);
31197 0 : s->uxbatchsize = 0;
31198 : }
31199 :
31200 : /*
31201 : * Append update to batch
31202 : */
31203 0 : ae_v_move(&s->uxbatch.ptr.pp_double[s->uxbatchsize][0], 1, &u->ptr.p_double[i0], 1, ae_v_len(0,s->uxbatchwidth-1));
31204 0 : inc(&s->uxbatchsize, _state);
31205 0 : }
31206 :
31207 :
31208 : /*************************************************************************
31209 : This function finalizes batch buffer. Call it after the last update.
31210 :
31211 : INPUT PARAMETERS:
31212 : S - model, only fields with UX prefix are used
31213 :
31214 : OUTPUT PARAMETERS:
31215 : S - UX temporaries updated
31216 : XXT - array[WindowWidth,WindowWidth], updated with
31217 : all previous updates, both triangles of the
31218 : symmetric matrix are present.
31219 :
31220 : -- ALGLIB --
31221 : Copyright 20.12.2017 by Bochkanov Sergey
31222 : *************************************************************************/
31223 0 : static void ssa_updatexxtfinalize(ssamodel* s,
31224 : /* Real */ ae_matrix* xxt,
31225 : ae_state *_state)
31226 : {
31227 :
31228 :
31229 0 : ae_assert(s->uxbatchsize>=0, "UpdateXXTFinalize: integrity check failure", _state);
31230 0 : ae_assert(s->uxbatchsize<=s->uxbatchlimit, "UpdateXXTFinalize: integrity check failure", _state);
31231 0 : ae_assert(s->uxbatchlimit>=1, "UpdateXXTFinalize: integrity check failure", _state);
31232 0 : if( s->uxbatchsize>0 )
31233 : {
31234 0 : rmatrixsyrk(s->uxbatchwidth, s->uxbatchsize, 1.0, &s->uxbatch, 0, 0, 2, 1.0, &s->xxt, 0, 0, ae_true, _state);
31235 0 : s->uxbatchsize = 0;
31236 : }
31237 0 : rmatrixenforcesymmetricity(&s->xxt, s->uxbatchwidth, ae_true, _state);
31238 0 : }
31239 :
31240 :
31241 0 : void _ssamodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
31242 : {
31243 0 : ssamodel *p = (ssamodel*)_p;
31244 0 : ae_touch_ptr((void*)p);
31245 0 : ae_vector_init(&p->sequenceidx, 0, DT_INT, _state, make_automatic);
31246 0 : ae_vector_init(&p->sequencedata, 0, DT_REAL, _state, make_automatic);
31247 0 : ae_matrix_init(&p->precomputedbasis, 0, 0, DT_REAL, _state, make_automatic);
31248 0 : ae_matrix_init(&p->basis, 0, 0, DT_REAL, _state, make_automatic);
31249 0 : ae_matrix_init(&p->basist, 0, 0, DT_REAL, _state, make_automatic);
31250 0 : ae_vector_init(&p->sv, 0, DT_REAL, _state, make_automatic);
31251 0 : ae_vector_init(&p->forecasta, 0, DT_REAL, _state, make_automatic);
31252 0 : _eigsubspacestate_init(&p->solver, _state, make_automatic);
31253 0 : ae_matrix_init(&p->xxt, 0, 0, DT_REAL, _state, make_automatic);
31254 0 : _hqrndstate_init(&p->rs, _state, make_automatic);
31255 0 : ae_vector_init(&p->rtqueue, 0, DT_INT, _state, make_automatic);
31256 0 : ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
31257 0 : ae_vector_init(&p->tmp1, 0, DT_REAL, _state, make_automatic);
31258 0 : _eigsubspacereport_init(&p->solverrep, _state, make_automatic);
31259 0 : ae_vector_init(&p->alongtrend, 0, DT_REAL, _state, make_automatic);
31260 0 : ae_vector_init(&p->alongnoise, 0, DT_REAL, _state, make_automatic);
31261 0 : ae_matrix_init(&p->aseqtrajectory, 0, 0, DT_REAL, _state, make_automatic);
31262 0 : ae_matrix_init(&p->aseqtbproduct, 0, 0, DT_REAL, _state, make_automatic);
31263 0 : ae_vector_init(&p->aseqcounts, 0, DT_INT, _state, make_automatic);
31264 0 : ae_vector_init(&p->fctrend, 0, DT_REAL, _state, make_automatic);
31265 0 : ae_vector_init(&p->fcnoise, 0, DT_REAL, _state, make_automatic);
31266 0 : ae_matrix_init(&p->fctrendm, 0, 0, DT_REAL, _state, make_automatic);
31267 0 : ae_matrix_init(&p->uxbatch, 0, 0, DT_REAL, _state, make_automatic);
31268 0 : }
31269 :
31270 :
31271 0 : void _ssamodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
31272 : {
31273 0 : ssamodel *dst = (ssamodel*)_dst;
31274 0 : ssamodel *src = (ssamodel*)_src;
31275 0 : dst->nsequences = src->nsequences;
31276 0 : ae_vector_init_copy(&dst->sequenceidx, &src->sequenceidx, _state, make_automatic);
31277 0 : ae_vector_init_copy(&dst->sequencedata, &src->sequencedata, _state, make_automatic);
31278 0 : dst->algotype = src->algotype;
31279 0 : dst->windowwidth = src->windowwidth;
31280 0 : dst->rtpowerup = src->rtpowerup;
31281 0 : dst->topk = src->topk;
31282 0 : dst->precomputedwidth = src->precomputedwidth;
31283 0 : dst->precomputednbasis = src->precomputednbasis;
31284 0 : ae_matrix_init_copy(&dst->precomputedbasis, &src->precomputedbasis, _state, make_automatic);
31285 0 : dst->defaultsubspaceits = src->defaultsubspaceits;
31286 0 : dst->memorylimit = src->memorylimit;
31287 0 : dst->arebasisandsolvervalid = src->arebasisandsolvervalid;
31288 0 : ae_matrix_init_copy(&dst->basis, &src->basis, _state, make_automatic);
31289 0 : ae_matrix_init_copy(&dst->basist, &src->basist, _state, make_automatic);
31290 0 : ae_vector_init_copy(&dst->sv, &src->sv, _state, make_automatic);
31291 0 : ae_vector_init_copy(&dst->forecasta, &src->forecasta, _state, make_automatic);
31292 0 : dst->nbasis = src->nbasis;
31293 0 : _eigsubspacestate_init_copy(&dst->solver, &src->solver, _state, make_automatic);
31294 0 : ae_matrix_init_copy(&dst->xxt, &src->xxt, _state, make_automatic);
31295 0 : _hqrndstate_init_copy(&dst->rs, &src->rs, _state, make_automatic);
31296 0 : dst->rngseed = src->rngseed;
31297 0 : ae_vector_init_copy(&dst->rtqueue, &src->rtqueue, _state, make_automatic);
31298 0 : dst->rtqueuecnt = src->rtqueuecnt;
31299 0 : dst->rtqueuechunk = src->rtqueuechunk;
31300 0 : dst->dbgcntevd = src->dbgcntevd;
31301 0 : ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
31302 0 : ae_vector_init_copy(&dst->tmp1, &src->tmp1, _state, make_automatic);
31303 0 : _eigsubspacereport_init_copy(&dst->solverrep, &src->solverrep, _state, make_automatic);
31304 0 : ae_vector_init_copy(&dst->alongtrend, &src->alongtrend, _state, make_automatic);
31305 0 : ae_vector_init_copy(&dst->alongnoise, &src->alongnoise, _state, make_automatic);
31306 0 : ae_matrix_init_copy(&dst->aseqtrajectory, &src->aseqtrajectory, _state, make_automatic);
31307 0 : ae_matrix_init_copy(&dst->aseqtbproduct, &src->aseqtbproduct, _state, make_automatic);
31308 0 : ae_vector_init_copy(&dst->aseqcounts, &src->aseqcounts, _state, make_automatic);
31309 0 : ae_vector_init_copy(&dst->fctrend, &src->fctrend, _state, make_automatic);
31310 0 : ae_vector_init_copy(&dst->fcnoise, &src->fcnoise, _state, make_automatic);
31311 0 : ae_matrix_init_copy(&dst->fctrendm, &src->fctrendm, _state, make_automatic);
31312 0 : ae_matrix_init_copy(&dst->uxbatch, &src->uxbatch, _state, make_automatic);
31313 0 : dst->uxbatchwidth = src->uxbatchwidth;
31314 0 : dst->uxbatchsize = src->uxbatchsize;
31315 0 : dst->uxbatchlimit = src->uxbatchlimit;
31316 0 : }
31317 :
31318 :
31319 0 : void _ssamodel_clear(void* _p)
31320 : {
31321 0 : ssamodel *p = (ssamodel*)_p;
31322 0 : ae_touch_ptr((void*)p);
31323 0 : ae_vector_clear(&p->sequenceidx);
31324 0 : ae_vector_clear(&p->sequencedata);
31325 0 : ae_matrix_clear(&p->precomputedbasis);
31326 0 : ae_matrix_clear(&p->basis);
31327 0 : ae_matrix_clear(&p->basist);
31328 0 : ae_vector_clear(&p->sv);
31329 0 : ae_vector_clear(&p->forecasta);
31330 0 : _eigsubspacestate_clear(&p->solver);
31331 0 : ae_matrix_clear(&p->xxt);
31332 0 : _hqrndstate_clear(&p->rs);
31333 0 : ae_vector_clear(&p->rtqueue);
31334 0 : ae_vector_clear(&p->tmp0);
31335 0 : ae_vector_clear(&p->tmp1);
31336 0 : _eigsubspacereport_clear(&p->solverrep);
31337 0 : ae_vector_clear(&p->alongtrend);
31338 0 : ae_vector_clear(&p->alongnoise);
31339 0 : ae_matrix_clear(&p->aseqtrajectory);
31340 0 : ae_matrix_clear(&p->aseqtbproduct);
31341 0 : ae_vector_clear(&p->aseqcounts);
31342 0 : ae_vector_clear(&p->fctrend);
31343 0 : ae_vector_clear(&p->fcnoise);
31344 0 : ae_matrix_clear(&p->fctrendm);
31345 0 : ae_matrix_clear(&p->uxbatch);
31346 0 : }
31347 :
31348 :
31349 0 : void _ssamodel_destroy(void* _p)
31350 : {
31351 0 : ssamodel *p = (ssamodel*)_p;
31352 0 : ae_touch_ptr((void*)p);
31353 0 : ae_vector_destroy(&p->sequenceidx);
31354 0 : ae_vector_destroy(&p->sequencedata);
31355 0 : ae_matrix_destroy(&p->precomputedbasis);
31356 0 : ae_matrix_destroy(&p->basis);
31357 0 : ae_matrix_destroy(&p->basist);
31358 0 : ae_vector_destroy(&p->sv);
31359 0 : ae_vector_destroy(&p->forecasta);
31360 0 : _eigsubspacestate_destroy(&p->solver);
31361 0 : ae_matrix_destroy(&p->xxt);
31362 0 : _hqrndstate_destroy(&p->rs);
31363 0 : ae_vector_destroy(&p->rtqueue);
31364 0 : ae_vector_destroy(&p->tmp0);
31365 0 : ae_vector_destroy(&p->tmp1);
31366 0 : _eigsubspacereport_destroy(&p->solverrep);
31367 0 : ae_vector_destroy(&p->alongtrend);
31368 0 : ae_vector_destroy(&p->alongnoise);
31369 0 : ae_matrix_destroy(&p->aseqtrajectory);
31370 0 : ae_matrix_destroy(&p->aseqtbproduct);
31371 0 : ae_vector_destroy(&p->aseqcounts);
31372 0 : ae_vector_destroy(&p->fctrend);
31373 0 : ae_vector_destroy(&p->fcnoise);
31374 0 : ae_matrix_destroy(&p->fctrendm);
31375 0 : ae_matrix_destroy(&p->uxbatch);
31376 0 : }
31377 :
31378 :
31379 : #endif
31380 : #if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
31381 :
31382 :
31383 : /*************************************************************************
31384 : Linear regression
31385 :
31386 : Subroutine builds model:
31387 :
31388 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
31389 :
31390 : and model found in ALGLIB format, covariation matrix, training set errors
31391 : (rms, average, average relative) and leave-one-out cross-validation
31392 : estimate of the generalization error. CV estimate calculated using fast
31393 : algorithm with O(NPoints*NVars) complexity.
31394 :
31395 : When covariation matrix is calculated standard deviations of function
31396 : values are assumed to be equal to RMS error on the training set.
31397 :
31398 : INPUT PARAMETERS:
31399 : XY - training set, array [0..NPoints-1,0..NVars]:
31400 : * NVars columns - independent variables
31401 : * last column - dependent variable
31402 : NPoints - training set size, NPoints>NVars+1
31403 : NVars - number of independent variables
31404 :
31405 : OUTPUT PARAMETERS:
31406 : Info - return code:
31407 : * -255, in case of unknown internal error
31408 : * -4, if internal SVD subroutine haven't converged
31409 : * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
31410 : * 1, if subroutine successfully finished
31411 : LM - linear model in the ALGLIB format. Use subroutines of
31412 : this unit to work with the model.
31413 : AR - additional results
31414 :
31415 :
31416 : -- ALGLIB --
31417 : Copyright 02.08.2008 by Bochkanov Sergey
31418 : *************************************************************************/
31419 0 : void lrbuild(/* Real */ ae_matrix* xy,
31420 : ae_int_t npoints,
31421 : ae_int_t nvars,
31422 : ae_int_t* info,
31423 : linearmodel* lm,
31424 : lrreport* ar,
31425 : ae_state *_state)
31426 : {
31427 : ae_frame _frame_block;
31428 : ae_vector s;
31429 : ae_int_t i;
31430 : double sigma2;
31431 :
31432 0 : ae_frame_make(_state, &_frame_block);
31433 0 : memset(&s, 0, sizeof(s));
31434 0 : *info = 0;
31435 0 : _linearmodel_clear(lm);
31436 0 : _lrreport_clear(ar);
31437 0 : ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
31438 :
31439 0 : if( npoints<=nvars+1||nvars<1 )
31440 : {
31441 0 : *info = -1;
31442 0 : ae_frame_leave(_state);
31443 0 : return;
31444 : }
31445 0 : ae_vector_set_length(&s, npoints-1+1, _state);
31446 0 : for(i=0; i<=npoints-1; i++)
31447 : {
31448 0 : s.ptr.p_double[i] = (double)(1);
31449 : }
31450 0 : lrbuilds(xy, &s, npoints, nvars, info, lm, ar, _state);
31451 0 : if( *info<0 )
31452 : {
31453 0 : ae_frame_leave(_state);
31454 0 : return;
31455 : }
31456 0 : sigma2 = ae_sqr(ar->rmserror, _state)*npoints/(npoints-nvars-1);
31457 0 : for(i=0; i<=nvars; i++)
31458 : {
31459 0 : ae_v_muld(&ar->c.ptr.pp_double[i][0], 1, ae_v_len(0,nvars), sigma2);
31460 : }
31461 0 : ae_frame_leave(_state);
31462 : }
31463 :
31464 :
31465 : /*************************************************************************
31466 : Linear regression
31467 :
31468 : Variant of LRBuild which uses vector of standatd deviations (errors in
31469 : function values).
31470 :
31471 : INPUT PARAMETERS:
31472 : XY - training set, array [0..NPoints-1,0..NVars]:
31473 : * NVars columns - independent variables
31474 : * last column - dependent variable
31475 : S - standard deviations (errors in function values)
31476 : array[0..NPoints-1], S[i]>0.
31477 : NPoints - training set size, NPoints>NVars+1
31478 : NVars - number of independent variables
31479 :
31480 : OUTPUT PARAMETERS:
31481 : Info - return code:
31482 : * -255, in case of unknown internal error
31483 : * -4, if internal SVD subroutine haven't converged
31484 : * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
31485 : * -2, if S[I]<=0
31486 : * 1, if subroutine successfully finished
31487 : LM - linear model in the ALGLIB format. Use subroutines of
31488 : this unit to work with the model.
31489 : AR - additional results
31490 :
31491 :
31492 : -- ALGLIB --
31493 : Copyright 02.08.2008 by Bochkanov Sergey
31494 : *************************************************************************/
31495 0 : void lrbuilds(/* Real */ ae_matrix* xy,
31496 : /* Real */ ae_vector* s,
31497 : ae_int_t npoints,
31498 : ae_int_t nvars,
31499 : ae_int_t* info,
31500 : linearmodel* lm,
31501 : lrreport* ar,
31502 : ae_state *_state)
31503 : {
31504 : ae_frame _frame_block;
31505 : ae_matrix xyi;
31506 : ae_vector x;
31507 : ae_vector means;
31508 : ae_vector sigmas;
31509 : ae_int_t i;
31510 : ae_int_t j;
31511 : double v;
31512 : ae_int_t offs;
31513 : double mean;
31514 : double variance;
31515 : double skewness;
31516 : double kurtosis;
31517 :
31518 0 : ae_frame_make(_state, &_frame_block);
31519 0 : memset(&xyi, 0, sizeof(xyi));
31520 0 : memset(&x, 0, sizeof(x));
31521 0 : memset(&means, 0, sizeof(means));
31522 0 : memset(&sigmas, 0, sizeof(sigmas));
31523 0 : *info = 0;
31524 0 : _linearmodel_clear(lm);
31525 0 : _lrreport_clear(ar);
31526 0 : ae_matrix_init(&xyi, 0, 0, DT_REAL, _state, ae_true);
31527 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
31528 0 : ae_vector_init(&means, 0, DT_REAL, _state, ae_true);
31529 0 : ae_vector_init(&sigmas, 0, DT_REAL, _state, ae_true);
31530 :
31531 :
31532 : /*
31533 : * Test parameters
31534 : */
31535 0 : if( npoints<=nvars+1||nvars<1 )
31536 : {
31537 0 : *info = -1;
31538 0 : ae_frame_leave(_state);
31539 0 : return;
31540 : }
31541 :
31542 : /*
31543 : * Copy data, add one more column (constant term)
31544 : */
31545 0 : ae_matrix_set_length(&xyi, npoints-1+1, nvars+1+1, _state);
31546 0 : for(i=0; i<=npoints-1; i++)
31547 : {
31548 0 : ae_v_move(&xyi.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
31549 0 : xyi.ptr.pp_double[i][nvars] = (double)(1);
31550 0 : xyi.ptr.pp_double[i][nvars+1] = xy->ptr.pp_double[i][nvars];
31551 : }
31552 :
31553 : /*
31554 : * Standartization
31555 : */
31556 0 : ae_vector_set_length(&x, npoints-1+1, _state);
31557 0 : ae_vector_set_length(&means, nvars-1+1, _state);
31558 0 : ae_vector_set_length(&sigmas, nvars-1+1, _state);
31559 0 : for(j=0; j<=nvars-1; j++)
31560 : {
31561 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
31562 0 : samplemoments(&x, npoints, &mean, &variance, &skewness, &kurtosis, _state);
31563 0 : means.ptr.p_double[j] = mean;
31564 0 : sigmas.ptr.p_double[j] = ae_sqrt(variance, _state);
31565 0 : if( ae_fp_eq(sigmas.ptr.p_double[j],(double)(0)) )
31566 : {
31567 0 : sigmas.ptr.p_double[j] = (double)(1);
31568 : }
31569 0 : for(i=0; i<=npoints-1; i++)
31570 : {
31571 0 : xyi.ptr.pp_double[i][j] = (xyi.ptr.pp_double[i][j]-means.ptr.p_double[j])/sigmas.ptr.p_double[j];
31572 : }
31573 : }
31574 :
31575 : /*
31576 : * Internal processing
31577 : */
31578 0 : linreg_lrinternal(&xyi, s, npoints, nvars+1, info, lm, ar, _state);
31579 0 : if( *info<0 )
31580 : {
31581 0 : ae_frame_leave(_state);
31582 0 : return;
31583 : }
31584 :
31585 : /*
31586 : * Un-standartization
31587 : */
31588 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31589 0 : for(j=0; j<=nvars-1; j++)
31590 : {
31591 :
31592 : /*
31593 : * Constant term is updated (and its covariance too,
31594 : * since it gets some variance from J-th component)
31595 : */
31596 0 : lm->w.ptr.p_double[offs+nvars] = lm->w.ptr.p_double[offs+nvars]-lm->w.ptr.p_double[offs+j]*means.ptr.p_double[j]/sigmas.ptr.p_double[j];
31597 0 : v = means.ptr.p_double[j]/sigmas.ptr.p_double[j];
31598 0 : ae_v_subd(&ar->c.ptr.pp_double[nvars][0], 1, &ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
31599 0 : ae_v_subd(&ar->c.ptr.pp_double[0][nvars], ar->c.stride, &ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
31600 :
31601 : /*
31602 : * J-th term is updated
31603 : */
31604 0 : lm->w.ptr.p_double[offs+j] = lm->w.ptr.p_double[offs+j]/sigmas.ptr.p_double[j];
31605 0 : v = 1/sigmas.ptr.p_double[j];
31606 0 : ae_v_muld(&ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
31607 0 : ae_v_muld(&ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
31608 : }
31609 0 : ae_frame_leave(_state);
31610 : }
31611 :
31612 :
31613 : /*************************************************************************
31614 : Like LRBuildS, but builds model
31615 :
31616 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
31617 :
31618 : i.e. with zero constant term.
31619 :
31620 : -- ALGLIB --
31621 : Copyright 30.10.2008 by Bochkanov Sergey
31622 : *************************************************************************/
31623 0 : void lrbuildzs(/* Real */ ae_matrix* xy,
31624 : /* Real */ ae_vector* s,
31625 : ae_int_t npoints,
31626 : ae_int_t nvars,
31627 : ae_int_t* info,
31628 : linearmodel* lm,
31629 : lrreport* ar,
31630 : ae_state *_state)
31631 : {
31632 : ae_frame _frame_block;
31633 : ae_matrix xyi;
31634 : ae_vector x;
31635 : ae_vector c;
31636 : ae_int_t i;
31637 : ae_int_t j;
31638 : double v;
31639 : ae_int_t offs;
31640 : double mean;
31641 : double variance;
31642 : double skewness;
31643 : double kurtosis;
31644 :
31645 0 : ae_frame_make(_state, &_frame_block);
31646 0 : memset(&xyi, 0, sizeof(xyi));
31647 0 : memset(&x, 0, sizeof(x));
31648 0 : memset(&c, 0, sizeof(c));
31649 0 : *info = 0;
31650 0 : _linearmodel_clear(lm);
31651 0 : _lrreport_clear(ar);
31652 0 : ae_matrix_init(&xyi, 0, 0, DT_REAL, _state, ae_true);
31653 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
31654 0 : ae_vector_init(&c, 0, DT_REAL, _state, ae_true);
31655 :
31656 :
31657 : /*
31658 : * Test parameters
31659 : */
31660 0 : if( npoints<=nvars+1||nvars<1 )
31661 : {
31662 0 : *info = -1;
31663 0 : ae_frame_leave(_state);
31664 0 : return;
31665 : }
31666 :
31667 : /*
31668 : * Copy data, add one more column (constant term)
31669 : */
31670 0 : ae_matrix_set_length(&xyi, npoints-1+1, nvars+1+1, _state);
31671 0 : for(i=0; i<=npoints-1; i++)
31672 : {
31673 0 : ae_v_move(&xyi.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
31674 0 : xyi.ptr.pp_double[i][nvars] = (double)(0);
31675 0 : xyi.ptr.pp_double[i][nvars+1] = xy->ptr.pp_double[i][nvars];
31676 : }
31677 :
31678 : /*
31679 : * Standartization: unusual scaling
31680 : */
31681 0 : ae_vector_set_length(&x, npoints-1+1, _state);
31682 0 : ae_vector_set_length(&c, nvars-1+1, _state);
31683 0 : for(j=0; j<=nvars-1; j++)
31684 : {
31685 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[0][j], xy->stride, ae_v_len(0,npoints-1));
31686 0 : samplemoments(&x, npoints, &mean, &variance, &skewness, &kurtosis, _state);
31687 0 : if( ae_fp_greater(ae_fabs(mean, _state),ae_sqrt(variance, _state)) )
31688 : {
31689 :
31690 : /*
31691 : * variation is relatively small, it is better to
31692 : * bring mean value to 1
31693 : */
31694 0 : c.ptr.p_double[j] = mean;
31695 : }
31696 : else
31697 : {
31698 :
31699 : /*
31700 : * variation is large, it is better to bring variance to 1
31701 : */
31702 0 : if( ae_fp_eq(variance,(double)(0)) )
31703 : {
31704 0 : variance = (double)(1);
31705 : }
31706 0 : c.ptr.p_double[j] = ae_sqrt(variance, _state);
31707 : }
31708 0 : for(i=0; i<=npoints-1; i++)
31709 : {
31710 0 : xyi.ptr.pp_double[i][j] = xyi.ptr.pp_double[i][j]/c.ptr.p_double[j];
31711 : }
31712 : }
31713 :
31714 : /*
31715 : * Internal processing
31716 : */
31717 0 : linreg_lrinternal(&xyi, s, npoints, nvars+1, info, lm, ar, _state);
31718 0 : if( *info<0 )
31719 : {
31720 0 : ae_frame_leave(_state);
31721 0 : return;
31722 : }
31723 :
31724 : /*
31725 : * Un-standartization
31726 : */
31727 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31728 0 : for(j=0; j<=nvars-1; j++)
31729 : {
31730 :
31731 : /*
31732 : * J-th term is updated
31733 : */
31734 0 : lm->w.ptr.p_double[offs+j] = lm->w.ptr.p_double[offs+j]/c.ptr.p_double[j];
31735 0 : v = 1/c.ptr.p_double[j];
31736 0 : ae_v_muld(&ar->c.ptr.pp_double[j][0], 1, ae_v_len(0,nvars), v);
31737 0 : ae_v_muld(&ar->c.ptr.pp_double[0][j], ar->c.stride, ae_v_len(0,nvars), v);
31738 : }
31739 0 : ae_frame_leave(_state);
31740 : }
31741 :
31742 :
31743 : /*************************************************************************
31744 : Like LRBuild but builds model
31745 :
31746 : Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
31747 :
31748 : i.e. with zero constant term.
31749 :
31750 : -- ALGLIB --
31751 : Copyright 30.10.2008 by Bochkanov Sergey
31752 : *************************************************************************/
31753 0 : void lrbuildz(/* Real */ ae_matrix* xy,
31754 : ae_int_t npoints,
31755 : ae_int_t nvars,
31756 : ae_int_t* info,
31757 : linearmodel* lm,
31758 : lrreport* ar,
31759 : ae_state *_state)
31760 : {
31761 : ae_frame _frame_block;
31762 : ae_vector s;
31763 : ae_int_t i;
31764 : double sigma2;
31765 :
31766 0 : ae_frame_make(_state, &_frame_block);
31767 0 : memset(&s, 0, sizeof(s));
31768 0 : *info = 0;
31769 0 : _linearmodel_clear(lm);
31770 0 : _lrreport_clear(ar);
31771 0 : ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
31772 :
31773 0 : if( npoints<=nvars+1||nvars<1 )
31774 : {
31775 0 : *info = -1;
31776 0 : ae_frame_leave(_state);
31777 0 : return;
31778 : }
31779 0 : ae_vector_set_length(&s, npoints-1+1, _state);
31780 0 : for(i=0; i<=npoints-1; i++)
31781 : {
31782 0 : s.ptr.p_double[i] = (double)(1);
31783 : }
31784 0 : lrbuildzs(xy, &s, npoints, nvars, info, lm, ar, _state);
31785 0 : if( *info<0 )
31786 : {
31787 0 : ae_frame_leave(_state);
31788 0 : return;
31789 : }
31790 0 : sigma2 = ae_sqr(ar->rmserror, _state)*npoints/(npoints-nvars-1);
31791 0 : for(i=0; i<=nvars; i++)
31792 : {
31793 0 : ae_v_muld(&ar->c.ptr.pp_double[i][0], 1, ae_v_len(0,nvars), sigma2);
31794 : }
31795 0 : ae_frame_leave(_state);
31796 : }
31797 :
31798 :
31799 : /*************************************************************************
31800 : Unpacks coefficients of linear model.
31801 :
31802 : INPUT PARAMETERS:
31803 : LM - linear model in ALGLIB format
31804 :
31805 : OUTPUT PARAMETERS:
31806 : V - coefficients, array[0..NVars]
31807 : constant term (intercept) is stored in the V[NVars].
31808 : NVars - number of independent variables (one less than number
31809 : of coefficients)
31810 :
31811 : -- ALGLIB --
31812 : Copyright 30.08.2008 by Bochkanov Sergey
31813 : *************************************************************************/
31814 0 : void lrunpack(linearmodel* lm,
31815 : /* Real */ ae_vector* v,
31816 : ae_int_t* nvars,
31817 : ae_state *_state)
31818 : {
31819 : ae_int_t offs;
31820 :
31821 0 : ae_vector_clear(v);
31822 0 : *nvars = 0;
31823 :
31824 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
31825 0 : *nvars = ae_round(lm->w.ptr.p_double[2], _state);
31826 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31827 0 : ae_vector_set_length(v, *nvars+1, _state);
31828 0 : ae_v_move(&v->ptr.p_double[0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,*nvars));
31829 0 : }
31830 :
31831 :
31832 : /*************************************************************************
31833 : "Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
31834 : reversed).
31835 :
31836 : INPUT PARAMETERS:
31837 : V - coefficients, array[0..NVars]
31838 : NVars - number of independent variables
31839 :
31840 : OUTPUT PAREMETERS:
31841 : LM - linear model.
31842 :
31843 : -- ALGLIB --
31844 : Copyright 30.08.2008 by Bochkanov Sergey
31845 : *************************************************************************/
31846 0 : void lrpack(/* Real */ ae_vector* v,
31847 : ae_int_t nvars,
31848 : linearmodel* lm,
31849 : ae_state *_state)
31850 : {
31851 : ae_int_t offs;
31852 :
31853 0 : _linearmodel_clear(lm);
31854 :
31855 0 : ae_vector_set_length(&lm->w, 4+nvars+1, _state);
31856 0 : offs = 4;
31857 0 : lm->w.ptr.p_double[0] = (double)(4+nvars+1);
31858 0 : lm->w.ptr.p_double[1] = (double)(linreg_lrvnum);
31859 0 : lm->w.ptr.p_double[2] = (double)(nvars);
31860 0 : lm->w.ptr.p_double[3] = (double)(offs);
31861 0 : ae_v_move(&lm->w.ptr.p_double[offs], 1, &v->ptr.p_double[0], 1, ae_v_len(offs,offs+nvars));
31862 0 : }
31863 :
31864 :
31865 : /*************************************************************************
31866 : Procesing
31867 :
31868 : INPUT PARAMETERS:
31869 : LM - linear model
31870 : X - input vector, array[0..NVars-1].
31871 :
31872 : Result:
31873 : value of linear model regression estimate
31874 :
31875 : -- ALGLIB --
31876 : Copyright 03.09.2008 by Bochkanov Sergey
31877 : *************************************************************************/
31878 0 : double lrprocess(linearmodel* lm,
31879 : /* Real */ ae_vector* x,
31880 : ae_state *_state)
31881 : {
31882 : double v;
31883 : ae_int_t offs;
31884 : ae_int_t nvars;
31885 : double result;
31886 :
31887 :
31888 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
31889 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
31890 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31891 0 : v = ae_v_dotproduct(&x->ptr.p_double[0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
31892 0 : result = v+lm->w.ptr.p_double[offs+nvars];
31893 0 : return result;
31894 : }
31895 :
31896 :
31897 : /*************************************************************************
31898 : RMS error on the test set
31899 :
31900 : INPUT PARAMETERS:
31901 : LM - linear model
31902 : XY - test set
31903 : NPoints - test set size
31904 :
31905 : RESULT:
31906 : root mean square error.
31907 :
31908 : -- ALGLIB --
31909 : Copyright 30.08.2008 by Bochkanov Sergey
31910 : *************************************************************************/
31911 0 : double lrrmserror(linearmodel* lm,
31912 : /* Real */ ae_matrix* xy,
31913 : ae_int_t npoints,
31914 : ae_state *_state)
31915 : {
31916 : ae_int_t i;
31917 : double v;
31918 : ae_int_t offs;
31919 : ae_int_t nvars;
31920 : double result;
31921 :
31922 :
31923 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
31924 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
31925 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31926 0 : result = (double)(0);
31927 0 : for(i=0; i<=npoints-1; i++)
31928 : {
31929 0 : v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
31930 0 : v = v+lm->w.ptr.p_double[offs+nvars];
31931 0 : result = result+ae_sqr(v-xy->ptr.pp_double[i][nvars], _state);
31932 : }
31933 0 : result = ae_sqrt(result/npoints, _state);
31934 0 : return result;
31935 : }
31936 :
31937 :
31938 : /*************************************************************************
31939 : Average error on the test set
31940 :
31941 : INPUT PARAMETERS:
31942 : LM - linear model
31943 : XY - test set
31944 : NPoints - test set size
31945 :
31946 : RESULT:
31947 : average error.
31948 :
31949 : -- ALGLIB --
31950 : Copyright 30.08.2008 by Bochkanov Sergey
31951 : *************************************************************************/
31952 0 : double lravgerror(linearmodel* lm,
31953 : /* Real */ ae_matrix* xy,
31954 : ae_int_t npoints,
31955 : ae_state *_state)
31956 : {
31957 : ae_int_t i;
31958 : double v;
31959 : ae_int_t offs;
31960 : ae_int_t nvars;
31961 : double result;
31962 :
31963 :
31964 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
31965 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
31966 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
31967 0 : result = (double)(0);
31968 0 : for(i=0; i<=npoints-1; i++)
31969 : {
31970 0 : v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
31971 0 : v = v+lm->w.ptr.p_double[offs+nvars];
31972 0 : result = result+ae_fabs(v-xy->ptr.pp_double[i][nvars], _state);
31973 : }
31974 0 : result = result/npoints;
31975 0 : return result;
31976 : }
31977 :
31978 :
31979 : /*************************************************************************
31980 : RMS error on the test set
31981 :
31982 : INPUT PARAMETERS:
31983 : LM - linear model
31984 : XY - test set
31985 : NPoints - test set size
31986 :
31987 : RESULT:
31988 : average relative error.
31989 :
31990 : -- ALGLIB --
31991 : Copyright 30.08.2008 by Bochkanov Sergey
31992 : *************************************************************************/
31993 0 : double lravgrelerror(linearmodel* lm,
31994 : /* Real */ ae_matrix* xy,
31995 : ae_int_t npoints,
31996 : ae_state *_state)
31997 : {
31998 : ae_int_t i;
31999 : ae_int_t k;
32000 : double v;
32001 : ae_int_t offs;
32002 : ae_int_t nvars;
32003 : double result;
32004 :
32005 :
32006 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==linreg_lrvnum, "LINREG: Incorrect LINREG version!", _state);
32007 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
32008 0 : offs = ae_round(lm->w.ptr.p_double[3], _state);
32009 0 : result = (double)(0);
32010 0 : k = 0;
32011 0 : for(i=0; i<=npoints-1; i++)
32012 : {
32013 0 : if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
32014 : {
32015 0 : v = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
32016 0 : v = v+lm->w.ptr.p_double[offs+nvars];
32017 0 : result = result+ae_fabs((v-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
32018 0 : k = k+1;
32019 : }
32020 : }
32021 0 : if( k!=0 )
32022 : {
32023 0 : result = result/k;
32024 : }
32025 0 : return result;
32026 : }
32027 :
32028 :
32029 : /*************************************************************************
32030 : Copying of LinearModel strucure
32031 :
32032 : INPUT PARAMETERS:
32033 : LM1 - original
32034 :
32035 : OUTPUT PARAMETERS:
32036 : LM2 - copy
32037 :
32038 : -- ALGLIB --
32039 : Copyright 15.03.2009 by Bochkanov Sergey
32040 : *************************************************************************/
32041 0 : void lrcopy(linearmodel* lm1, linearmodel* lm2, ae_state *_state)
32042 : {
32043 : ae_int_t k;
32044 :
32045 0 : _linearmodel_clear(lm2);
32046 :
32047 0 : k = ae_round(lm1->w.ptr.p_double[0], _state);
32048 0 : ae_vector_set_length(&lm2->w, k-1+1, _state);
32049 0 : ae_v_move(&lm2->w.ptr.p_double[0], 1, &lm1->w.ptr.p_double[0], 1, ae_v_len(0,k-1));
32050 0 : }
32051 :
32052 :
32053 0 : void lrlines(/* Real */ ae_matrix* xy,
32054 : /* Real */ ae_vector* s,
32055 : ae_int_t n,
32056 : ae_int_t* info,
32057 : double* a,
32058 : double* b,
32059 : double* vara,
32060 : double* varb,
32061 : double* covab,
32062 : double* corrab,
32063 : double* p,
32064 : ae_state *_state)
32065 : {
32066 : ae_int_t i;
32067 : double ss;
32068 : double sx;
32069 : double sxx;
32070 : double sy;
32071 : double stt;
32072 : double e1;
32073 : double e2;
32074 : double t;
32075 : double chi2;
32076 :
32077 0 : *info = 0;
32078 0 : *a = 0;
32079 0 : *b = 0;
32080 0 : *vara = 0;
32081 0 : *varb = 0;
32082 0 : *covab = 0;
32083 0 : *corrab = 0;
32084 0 : *p = 0;
32085 :
32086 0 : if( n<2 )
32087 : {
32088 0 : *info = -1;
32089 0 : return;
32090 : }
32091 0 : for(i=0; i<=n-1; i++)
32092 : {
32093 0 : if( ae_fp_less_eq(s->ptr.p_double[i],(double)(0)) )
32094 : {
32095 0 : *info = -2;
32096 0 : return;
32097 : }
32098 : }
32099 0 : *info = 1;
32100 :
32101 : /*
32102 : * Calculate S, SX, SY, SXX
32103 : */
32104 0 : ss = (double)(0);
32105 0 : sx = (double)(0);
32106 0 : sy = (double)(0);
32107 0 : sxx = (double)(0);
32108 0 : for(i=0; i<=n-1; i++)
32109 : {
32110 0 : t = ae_sqr(s->ptr.p_double[i], _state);
32111 0 : ss = ss+1/t;
32112 0 : sx = sx+xy->ptr.pp_double[i][0]/t;
32113 0 : sy = sy+xy->ptr.pp_double[i][1]/t;
32114 0 : sxx = sxx+ae_sqr(xy->ptr.pp_double[i][0], _state)/t;
32115 : }
32116 :
32117 : /*
32118 : * Test for condition number
32119 : */
32120 0 : t = ae_sqrt(4*ae_sqr(sx, _state)+ae_sqr(ss-sxx, _state), _state);
32121 0 : e1 = 0.5*(ss+sxx+t);
32122 0 : e2 = 0.5*(ss+sxx-t);
32123 0 : if( ae_fp_less_eq(ae_minreal(e1, e2, _state),1000*ae_machineepsilon*ae_maxreal(e1, e2, _state)) )
32124 : {
32125 0 : *info = -3;
32126 0 : return;
32127 : }
32128 :
32129 : /*
32130 : * Calculate A, B
32131 : */
32132 0 : *a = (double)(0);
32133 0 : *b = (double)(0);
32134 0 : stt = (double)(0);
32135 0 : for(i=0; i<=n-1; i++)
32136 : {
32137 0 : t = (xy->ptr.pp_double[i][0]-sx/ss)/s->ptr.p_double[i];
32138 0 : *b = *b+t*xy->ptr.pp_double[i][1]/s->ptr.p_double[i];
32139 0 : stt = stt+ae_sqr(t, _state);
32140 : }
32141 0 : *b = *b/stt;
32142 0 : *a = (sy-sx*(*b))/ss;
32143 :
32144 : /*
32145 : * Calculate goodness-of-fit
32146 : */
32147 0 : if( n>2 )
32148 : {
32149 0 : chi2 = (double)(0);
32150 0 : for(i=0; i<=n-1; i++)
32151 : {
32152 0 : chi2 = chi2+ae_sqr((xy->ptr.pp_double[i][1]-(*a)-*b*xy->ptr.pp_double[i][0])/s->ptr.p_double[i], _state);
32153 : }
32154 0 : *p = incompletegammac((double)(n-2)/(double)2, chi2/2, _state);
32155 : }
32156 : else
32157 : {
32158 0 : *p = (double)(1);
32159 : }
32160 :
32161 : /*
32162 : * Calculate other parameters
32163 : */
32164 0 : *vara = (1+ae_sqr(sx, _state)/(ss*stt))/ss;
32165 0 : *varb = 1/stt;
32166 0 : *covab = -sx/(ss*stt);
32167 0 : *corrab = *covab/ae_sqrt(*vara*(*varb), _state);
32168 : }
32169 :
32170 :
32171 0 : void lrline(/* Real */ ae_matrix* xy,
32172 : ae_int_t n,
32173 : ae_int_t* info,
32174 : double* a,
32175 : double* b,
32176 : ae_state *_state)
32177 : {
32178 : ae_frame _frame_block;
32179 : ae_vector s;
32180 : ae_int_t i;
32181 : double vara;
32182 : double varb;
32183 : double covab;
32184 : double corrab;
32185 : double p;
32186 :
32187 0 : ae_frame_make(_state, &_frame_block);
32188 0 : memset(&s, 0, sizeof(s));
32189 0 : *info = 0;
32190 0 : *a = 0;
32191 0 : *b = 0;
32192 0 : ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
32193 :
32194 0 : if( n<2 )
32195 : {
32196 0 : *info = -1;
32197 0 : ae_frame_leave(_state);
32198 0 : return;
32199 : }
32200 0 : ae_vector_set_length(&s, n-1+1, _state);
32201 0 : for(i=0; i<=n-1; i++)
32202 : {
32203 0 : s.ptr.p_double[i] = (double)(1);
32204 : }
32205 0 : lrlines(xy, &s, n, info, a, b, &vara, &varb, &covab, &corrab, &p, _state);
32206 0 : ae_frame_leave(_state);
32207 : }
32208 :
32209 :
32210 : /*************************************************************************
32211 : Internal linear regression subroutine
32212 : *************************************************************************/
32213 0 : static void linreg_lrinternal(/* Real */ ae_matrix* xy,
32214 : /* Real */ ae_vector* s,
32215 : ae_int_t npoints,
32216 : ae_int_t nvars,
32217 : ae_int_t* info,
32218 : linearmodel* lm,
32219 : lrreport* ar,
32220 : ae_state *_state)
32221 : {
32222 : ae_frame _frame_block;
32223 : ae_matrix a;
32224 : ae_matrix u;
32225 : ae_matrix vt;
32226 : ae_matrix vm;
32227 : ae_matrix xym;
32228 : ae_vector b;
32229 : ae_vector sv;
32230 : ae_vector t;
32231 : ae_vector svi;
32232 : ae_vector work;
32233 : ae_int_t i;
32234 : ae_int_t j;
32235 : ae_int_t k;
32236 : ae_int_t ncv;
32237 : ae_int_t na;
32238 : ae_int_t nacv;
32239 : double r;
32240 : double p;
32241 : double epstol;
32242 : lrreport ar2;
32243 : ae_int_t offs;
32244 : linearmodel tlm;
32245 :
32246 0 : ae_frame_make(_state, &_frame_block);
32247 0 : memset(&a, 0, sizeof(a));
32248 0 : memset(&u, 0, sizeof(u));
32249 0 : memset(&vt, 0, sizeof(vt));
32250 0 : memset(&vm, 0, sizeof(vm));
32251 0 : memset(&xym, 0, sizeof(xym));
32252 0 : memset(&b, 0, sizeof(b));
32253 0 : memset(&sv, 0, sizeof(sv));
32254 0 : memset(&t, 0, sizeof(t));
32255 0 : memset(&svi, 0, sizeof(svi));
32256 0 : memset(&work, 0, sizeof(work));
32257 0 : memset(&ar2, 0, sizeof(ar2));
32258 0 : memset(&tlm, 0, sizeof(tlm));
32259 0 : *info = 0;
32260 0 : _linearmodel_clear(lm);
32261 0 : _lrreport_clear(ar);
32262 0 : ae_matrix_init(&a, 0, 0, DT_REAL, _state, ae_true);
32263 0 : ae_matrix_init(&u, 0, 0, DT_REAL, _state, ae_true);
32264 0 : ae_matrix_init(&vt, 0, 0, DT_REAL, _state, ae_true);
32265 0 : ae_matrix_init(&vm, 0, 0, DT_REAL, _state, ae_true);
32266 0 : ae_matrix_init(&xym, 0, 0, DT_REAL, _state, ae_true);
32267 0 : ae_vector_init(&b, 0, DT_REAL, _state, ae_true);
32268 0 : ae_vector_init(&sv, 0, DT_REAL, _state, ae_true);
32269 0 : ae_vector_init(&t, 0, DT_REAL, _state, ae_true);
32270 0 : ae_vector_init(&svi, 0, DT_REAL, _state, ae_true);
32271 0 : ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
32272 0 : _lrreport_init(&ar2, _state, ae_true);
32273 0 : _linearmodel_init(&tlm, _state, ae_true);
32274 :
32275 0 : epstol = (double)(1000);
32276 :
32277 : /*
32278 : * Check for errors in data
32279 : */
32280 0 : if( npoints<nvars||nvars<1 )
32281 : {
32282 0 : *info = -1;
32283 0 : ae_frame_leave(_state);
32284 0 : return;
32285 : }
32286 0 : for(i=0; i<=npoints-1; i++)
32287 : {
32288 0 : if( ae_fp_less_eq(s->ptr.p_double[i],(double)(0)) )
32289 : {
32290 0 : *info = -2;
32291 0 : ae_frame_leave(_state);
32292 0 : return;
32293 : }
32294 : }
32295 0 : *info = 1;
32296 :
32297 : /*
32298 : * Create design matrix
32299 : */
32300 0 : ae_matrix_set_length(&a, npoints-1+1, nvars-1+1, _state);
32301 0 : ae_vector_set_length(&b, npoints-1+1, _state);
32302 0 : for(i=0; i<=npoints-1; i++)
32303 : {
32304 0 : r = 1/s->ptr.p_double[i];
32305 0 : ae_v_moved(&a.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), r);
32306 0 : b.ptr.p_double[i] = xy->ptr.pp_double[i][nvars]/s->ptr.p_double[i];
32307 : }
32308 :
32309 : /*
32310 : * Allocate W:
32311 : * W[0] array size
32312 : * W[1] version number, 0
32313 : * W[2] NVars (minus 1, to be compatible with external representation)
32314 : * W[3] coefficients offset
32315 : */
32316 0 : ae_vector_set_length(&lm->w, 4+nvars-1+1, _state);
32317 0 : offs = 4;
32318 0 : lm->w.ptr.p_double[0] = (double)(4+nvars);
32319 0 : lm->w.ptr.p_double[1] = (double)(linreg_lrvnum);
32320 0 : lm->w.ptr.p_double[2] = (double)(nvars-1);
32321 0 : lm->w.ptr.p_double[3] = (double)(offs);
32322 :
32323 : /*
32324 : * Solve problem using SVD:
32325 : *
32326 : * 0. check for degeneracy (different types)
32327 : * 1. A = U*diag(sv)*V'
32328 : * 2. T = b'*U
32329 : * 3. w = SUM((T[i]/sv[i])*V[..,i])
32330 : * 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
32331 : *
32332 : * see $15.4 of "Numerical Recipes in C" for more information
32333 : */
32334 0 : ae_vector_set_length(&t, nvars-1+1, _state);
32335 0 : ae_vector_set_length(&svi, nvars-1+1, _state);
32336 0 : ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
32337 0 : ae_matrix_set_length(&vm, nvars-1+1, nvars-1+1, _state);
32338 0 : if( !rmatrixsvd(&a, npoints, nvars, 1, 1, 2, &sv, &u, &vt, _state) )
32339 : {
32340 0 : *info = -4;
32341 0 : ae_frame_leave(_state);
32342 0 : return;
32343 : }
32344 0 : if( ae_fp_less_eq(sv.ptr.p_double[0],(double)(0)) )
32345 : {
32346 :
32347 : /*
32348 : * Degenerate case: zero design matrix.
32349 : */
32350 0 : for(i=offs; i<=offs+nvars-1; i++)
32351 : {
32352 0 : lm->w.ptr.p_double[i] = (double)(0);
32353 : }
32354 0 : ar->rmserror = lrrmserror(lm, xy, npoints, _state);
32355 0 : ar->avgerror = lravgerror(lm, xy, npoints, _state);
32356 0 : ar->avgrelerror = lravgrelerror(lm, xy, npoints, _state);
32357 0 : ar->cvrmserror = ar->rmserror;
32358 0 : ar->cvavgerror = ar->avgerror;
32359 0 : ar->cvavgrelerror = ar->avgrelerror;
32360 0 : ar->ncvdefects = 0;
32361 0 : ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
32362 0 : for(i=0; i<=nvars-1; i++)
32363 : {
32364 0 : ar->cvdefects.ptr.p_int[i] = -1;
32365 : }
32366 0 : ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
32367 0 : for(i=0; i<=nvars-1; i++)
32368 : {
32369 0 : for(j=0; j<=nvars-1; j++)
32370 : {
32371 0 : ar->c.ptr.pp_double[i][j] = (double)(0);
32372 : }
32373 : }
32374 0 : ae_frame_leave(_state);
32375 0 : return;
32376 : }
32377 0 : if( ae_fp_less_eq(sv.ptr.p_double[nvars-1],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
32378 : {
32379 :
32380 : /*
32381 : * Degenerate case, non-zero design matrix.
32382 : *
32383 : * We can leave it and solve task in SVD least squares fashion.
32384 : * Solution and covariance matrix will be obtained correctly,
32385 : * but CV error estimates - will not. It is better to reduce
32386 : * it to non-degenerate task and to obtain correct CV estimates.
32387 : */
32388 0 : for(k=nvars; k>=1; k--)
32389 : {
32390 0 : if( ae_fp_greater(sv.ptr.p_double[k-1],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
32391 : {
32392 :
32393 : /*
32394 : * Reduce
32395 : */
32396 0 : ae_matrix_set_length(&xym, npoints-1+1, k+1, _state);
32397 0 : for(i=0; i<=npoints-1; i++)
32398 : {
32399 0 : for(j=0; j<=k-1; j++)
32400 : {
32401 0 : r = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &vt.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
32402 0 : xym.ptr.pp_double[i][j] = r;
32403 : }
32404 0 : xym.ptr.pp_double[i][k] = xy->ptr.pp_double[i][nvars];
32405 : }
32406 :
32407 : /*
32408 : * Solve
32409 : */
32410 0 : linreg_lrinternal(&xym, s, npoints, k, info, &tlm, &ar2, _state);
32411 0 : if( *info!=1 )
32412 : {
32413 0 : ae_frame_leave(_state);
32414 0 : return;
32415 : }
32416 :
32417 : /*
32418 : * Convert back to un-reduced format
32419 : */
32420 0 : for(j=0; j<=nvars-1; j++)
32421 : {
32422 0 : lm->w.ptr.p_double[offs+j] = (double)(0);
32423 : }
32424 0 : for(j=0; j<=k-1; j++)
32425 : {
32426 0 : r = tlm.w.ptr.p_double[offs+j];
32427 0 : ae_v_addd(&lm->w.ptr.p_double[offs], 1, &vt.ptr.pp_double[j][0], 1, ae_v_len(offs,offs+nvars-1), r);
32428 : }
32429 0 : ar->rmserror = ar2.rmserror;
32430 0 : ar->avgerror = ar2.avgerror;
32431 0 : ar->avgrelerror = ar2.avgrelerror;
32432 0 : ar->cvrmserror = ar2.cvrmserror;
32433 0 : ar->cvavgerror = ar2.cvavgerror;
32434 0 : ar->cvavgrelerror = ar2.cvavgrelerror;
32435 0 : ar->ncvdefects = ar2.ncvdefects;
32436 0 : ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
32437 0 : for(j=0; j<=ar->ncvdefects-1; j++)
32438 : {
32439 0 : ar->cvdefects.ptr.p_int[j] = ar2.cvdefects.ptr.p_int[j];
32440 : }
32441 0 : for(j=ar->ncvdefects; j<=nvars-1; j++)
32442 : {
32443 0 : ar->cvdefects.ptr.p_int[j] = -1;
32444 : }
32445 0 : ae_matrix_set_length(&ar->c, nvars-1+1, nvars-1+1, _state);
32446 0 : ae_vector_set_length(&work, nvars+1, _state);
32447 0 : matrixmatrixmultiply(&ar2.c, 0, k-1, 0, k-1, ae_false, &vt, 0, k-1, 0, nvars-1, ae_false, 1.0, &vm, 0, k-1, 0, nvars-1, 0.0, &work, _state);
32448 0 : matrixmatrixmultiply(&vt, 0, k-1, 0, nvars-1, ae_true, &vm, 0, k-1, 0, nvars-1, ae_false, 1.0, &ar->c, 0, nvars-1, 0, nvars-1, 0.0, &work, _state);
32449 0 : ae_frame_leave(_state);
32450 0 : return;
32451 : }
32452 : }
32453 0 : *info = -255;
32454 0 : ae_frame_leave(_state);
32455 0 : return;
32456 : }
32457 0 : for(i=0; i<=nvars-1; i++)
32458 : {
32459 0 : if( ae_fp_greater(sv.ptr.p_double[i],epstol*ae_machineepsilon*sv.ptr.p_double[0]) )
32460 : {
32461 0 : svi.ptr.p_double[i] = 1/sv.ptr.p_double[i];
32462 : }
32463 : else
32464 : {
32465 0 : svi.ptr.p_double[i] = (double)(0);
32466 : }
32467 : }
32468 0 : for(i=0; i<=nvars-1; i++)
32469 : {
32470 0 : t.ptr.p_double[i] = (double)(0);
32471 : }
32472 0 : for(i=0; i<=npoints-1; i++)
32473 : {
32474 0 : r = b.ptr.p_double[i];
32475 0 : ae_v_addd(&t.ptr.p_double[0], 1, &u.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1), r);
32476 : }
32477 0 : for(i=0; i<=nvars-1; i++)
32478 : {
32479 0 : lm->w.ptr.p_double[offs+i] = (double)(0);
32480 : }
32481 0 : for(i=0; i<=nvars-1; i++)
32482 : {
32483 0 : r = t.ptr.p_double[i]*svi.ptr.p_double[i];
32484 0 : ae_v_addd(&lm->w.ptr.p_double[offs], 1, &vt.ptr.pp_double[i][0], 1, ae_v_len(offs,offs+nvars-1), r);
32485 : }
32486 0 : for(j=0; j<=nvars-1; j++)
32487 : {
32488 0 : r = svi.ptr.p_double[j];
32489 0 : ae_v_moved(&vm.ptr.pp_double[0][j], vm.stride, &vt.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1), r);
32490 : }
32491 0 : for(i=0; i<=nvars-1; i++)
32492 : {
32493 0 : for(j=i; j<=nvars-1; j++)
32494 : {
32495 0 : r = ae_v_dotproduct(&vm.ptr.pp_double[i][0], 1, &vm.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
32496 0 : ar->c.ptr.pp_double[i][j] = r;
32497 0 : ar->c.ptr.pp_double[j][i] = r;
32498 : }
32499 : }
32500 :
32501 : /*
32502 : * Leave-1-out cross-validation error.
32503 : *
32504 : * NOTATIONS:
32505 : * A design matrix
32506 : * A*x = b original linear least squares task
32507 : * U*S*V' SVD of A
32508 : * ai i-th row of the A
32509 : * bi i-th element of the b
32510 : * xf solution of the original LLS task
32511 : *
32512 : * Cross-validation error of i-th element from a sample is
32513 : * calculated using following formula:
32514 : *
32515 : * ERRi = ai*xf - (ai*xf-bi*(ui*ui'))/(1-ui*ui') (1)
32516 : *
32517 : * This formula can be derived from normal equations of the
32518 : * original task
32519 : *
32520 : * (A'*A)x = A'*b (2)
32521 : *
32522 : * by applying modification (zeroing out i-th row of A) to (2):
32523 : *
32524 : * (A-ai)'*(A-ai) = (A-ai)'*b
32525 : *
32526 : * and using Sherman-Morrison formula for updating matrix inverse
32527 : *
32528 : * NOTE 1: b is not zeroed out since it is much simpler and
32529 : * does not influence final result.
32530 : *
32531 : * NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
32532 : * Formula (1) can't be applied for such cases and they are skipped
32533 : * from CV calculation (which distorts resulting CV estimate).
32534 : * But from the properties of U we can conclude that there can
32535 : * be no more than NVars such vectors. Usually
32536 : * NVars << NPoints, so in a normal case it only slightly
32537 : * influences result.
32538 : */
32539 0 : ncv = 0;
32540 0 : na = 0;
32541 0 : nacv = 0;
32542 0 : ar->rmserror = (double)(0);
32543 0 : ar->avgerror = (double)(0);
32544 0 : ar->avgrelerror = (double)(0);
32545 0 : ar->cvrmserror = (double)(0);
32546 0 : ar->cvavgerror = (double)(0);
32547 0 : ar->cvavgrelerror = (double)(0);
32548 0 : ar->ncvdefects = 0;
32549 0 : ae_vector_set_length(&ar->cvdefects, nvars-1+1, _state);
32550 0 : for(i=0; i<=nvars-1; i++)
32551 : {
32552 0 : ar->cvdefects.ptr.p_int[i] = -1;
32553 : }
32554 0 : for(i=0; i<=npoints-1; i++)
32555 : {
32556 :
32557 : /*
32558 : * Error on a training set
32559 : */
32560 0 : r = ae_v_dotproduct(&xy->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs], 1, ae_v_len(0,nvars-1));
32561 0 : ar->rmserror = ar->rmserror+ae_sqr(r-xy->ptr.pp_double[i][nvars], _state);
32562 0 : ar->avgerror = ar->avgerror+ae_fabs(r-xy->ptr.pp_double[i][nvars], _state);
32563 0 : if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
32564 : {
32565 0 : ar->avgrelerror = ar->avgrelerror+ae_fabs((r-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
32566 0 : na = na+1;
32567 : }
32568 :
32569 : /*
32570 : * Error using fast leave-one-out cross-validation
32571 : */
32572 0 : p = ae_v_dotproduct(&u.ptr.pp_double[i][0], 1, &u.ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
32573 0 : if( ae_fp_greater(p,1-epstol*ae_machineepsilon) )
32574 : {
32575 0 : ar->cvdefects.ptr.p_int[ar->ncvdefects] = i;
32576 0 : ar->ncvdefects = ar->ncvdefects+1;
32577 0 : continue;
32578 : }
32579 0 : r = s->ptr.p_double[i]*(r/s->ptr.p_double[i]-b.ptr.p_double[i]*p)/(1-p);
32580 0 : ar->cvrmserror = ar->cvrmserror+ae_sqr(r-xy->ptr.pp_double[i][nvars], _state);
32581 0 : ar->cvavgerror = ar->cvavgerror+ae_fabs(r-xy->ptr.pp_double[i][nvars], _state);
32582 0 : if( ae_fp_neq(xy->ptr.pp_double[i][nvars],(double)(0)) )
32583 : {
32584 0 : ar->cvavgrelerror = ar->cvavgrelerror+ae_fabs((r-xy->ptr.pp_double[i][nvars])/xy->ptr.pp_double[i][nvars], _state);
32585 0 : nacv = nacv+1;
32586 : }
32587 0 : ncv = ncv+1;
32588 : }
32589 0 : if( ncv==0 )
32590 : {
32591 :
32592 : /*
32593 : * Something strange: ALL ui are degenerate.
32594 : * Unexpected...
32595 : */
32596 0 : *info = -255;
32597 0 : ae_frame_leave(_state);
32598 0 : return;
32599 : }
32600 0 : ar->rmserror = ae_sqrt(ar->rmserror/npoints, _state);
32601 0 : ar->avgerror = ar->avgerror/npoints;
32602 0 : if( na!=0 )
32603 : {
32604 0 : ar->avgrelerror = ar->avgrelerror/na;
32605 : }
32606 0 : ar->cvrmserror = ae_sqrt(ar->cvrmserror/ncv, _state);
32607 0 : ar->cvavgerror = ar->cvavgerror/ncv;
32608 0 : if( nacv!=0 )
32609 : {
32610 0 : ar->cvavgrelerror = ar->cvavgrelerror/nacv;
32611 : }
32612 0 : ae_frame_leave(_state);
32613 : }
32614 :
32615 :
32616 0 : void _linearmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
32617 : {
32618 0 : linearmodel *p = (linearmodel*)_p;
32619 0 : ae_touch_ptr((void*)p);
32620 0 : ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic);
32621 0 : }
32622 :
32623 :
32624 0 : void _linearmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
32625 : {
32626 0 : linearmodel *dst = (linearmodel*)_dst;
32627 0 : linearmodel *src = (linearmodel*)_src;
32628 0 : ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic);
32629 0 : }
32630 :
32631 :
32632 0 : void _linearmodel_clear(void* _p)
32633 : {
32634 0 : linearmodel *p = (linearmodel*)_p;
32635 0 : ae_touch_ptr((void*)p);
32636 0 : ae_vector_clear(&p->w);
32637 0 : }
32638 :
32639 :
32640 0 : void _linearmodel_destroy(void* _p)
32641 : {
32642 0 : linearmodel *p = (linearmodel*)_p;
32643 0 : ae_touch_ptr((void*)p);
32644 0 : ae_vector_destroy(&p->w);
32645 0 : }
32646 :
32647 :
32648 0 : void _lrreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
32649 : {
32650 0 : lrreport *p = (lrreport*)_p;
32651 0 : ae_touch_ptr((void*)p);
32652 0 : ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
32653 0 : ae_vector_init(&p->cvdefects, 0, DT_INT, _state, make_automatic);
32654 0 : }
32655 :
32656 :
32657 0 : void _lrreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
32658 : {
32659 0 : lrreport *dst = (lrreport*)_dst;
32660 0 : lrreport *src = (lrreport*)_src;
32661 0 : ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
32662 0 : dst->rmserror = src->rmserror;
32663 0 : dst->avgerror = src->avgerror;
32664 0 : dst->avgrelerror = src->avgrelerror;
32665 0 : dst->cvrmserror = src->cvrmserror;
32666 0 : dst->cvavgerror = src->cvavgerror;
32667 0 : dst->cvavgrelerror = src->cvavgrelerror;
32668 0 : dst->ncvdefects = src->ncvdefects;
32669 0 : ae_vector_init_copy(&dst->cvdefects, &src->cvdefects, _state, make_automatic);
32670 0 : }
32671 :
32672 :
32673 0 : void _lrreport_clear(void* _p)
32674 : {
32675 0 : lrreport *p = (lrreport*)_p;
32676 0 : ae_touch_ptr((void*)p);
32677 0 : ae_matrix_clear(&p->c);
32678 0 : ae_vector_clear(&p->cvdefects);
32679 0 : }
32680 :
32681 :
32682 0 : void _lrreport_destroy(void* _p)
32683 : {
32684 0 : lrreport *p = (lrreport*)_p;
32685 0 : ae_touch_ptr((void*)p);
32686 0 : ae_matrix_destroy(&p->c);
32687 0 : ae_vector_destroy(&p->cvdefects);
32688 0 : }
32689 :
32690 :
32691 : #endif
32692 : #if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
32693 :
32694 :
32695 : /*************************************************************************
32696 : Filters: simple moving averages (unsymmetric).
32697 :
32698 : This filter replaces array by results of SMA(K) filter. SMA(K) is defined
32699 : as filter which averages at most K previous points (previous - not points
32700 : AROUND central point) - or less, in case of the first K-1 points.
32701 :
32702 : INPUT PARAMETERS:
32703 : X - array[N], array to process. It can be larger than N,
32704 : in this case only first N points are processed.
32705 : N - points count, N>=0
32706 : K - K>=1 (K can be larger than N , such cases will be
32707 : correctly handled). Window width. K=1 corresponds to
32708 : identity transformation (nothing changes).
32709 :
32710 : OUTPUT PARAMETERS:
32711 : X - array, whose first N elements were processed with SMA(K)
32712 :
32713 : NOTE 1: this function uses efficient in-place algorithm which does not
32714 : allocate temporary arrays.
32715 :
32716 : NOTE 2: this algorithm makes only one pass through array and uses running
32717 : sum to speed-up calculation of the averages. Additional measures
32718 : are taken to ensure that running sum on a long sequence of zero
32719 : elements will be correctly reset to zero even in the presence of
32720 : round-off error.
32721 :
32722 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
32723 : averages points after the current one. Only X[i], X[i-1], ... are
32724 : used when calculating new value of X[i]. We should also note that
32725 : this algorithm uses BOTH previous points and current one, i.e.
32726 : new value of X[i] depends on BOTH previous point and X[i] itself.
32727 :
32728 : -- ALGLIB --
32729 : Copyright 25.10.2011 by Bochkanov Sergey
32730 : *************************************************************************/
32731 0 : void filtersma(/* Real */ ae_vector* x,
32732 : ae_int_t n,
32733 : ae_int_t k,
32734 : ae_state *_state)
32735 : {
32736 : ae_int_t i;
32737 : double runningsum;
32738 : double termsinsum;
32739 : ae_int_t zeroprefix;
32740 : double v;
32741 :
32742 :
32743 0 : ae_assert(n>=0, "FilterSMA: N<0", _state);
32744 0 : ae_assert(x->cnt>=n, "FilterSMA: Length(X)<N", _state);
32745 0 : ae_assert(isfinitevector(x, n, _state), "FilterSMA: X contains INF or NAN", _state);
32746 0 : ae_assert(k>=1, "FilterSMA: K<1", _state);
32747 :
32748 : /*
32749 : * Quick exit, if necessary
32750 : */
32751 0 : if( n<=1||k==1 )
32752 : {
32753 0 : return;
32754 : }
32755 :
32756 : /*
32757 : * Prepare variables (see below for explanation)
32758 : */
32759 0 : runningsum = 0.0;
32760 0 : termsinsum = (double)(0);
32761 0 : for(i=ae_maxint(n-k, 0, _state); i<=n-1; i++)
32762 : {
32763 0 : runningsum = runningsum+x->ptr.p_double[i];
32764 0 : termsinsum = termsinsum+1;
32765 : }
32766 0 : i = ae_maxint(n-k, 0, _state);
32767 0 : zeroprefix = 0;
32768 0 : while(i<=n-1&&ae_fp_eq(x->ptr.p_double[i],(double)(0)))
32769 : {
32770 0 : zeroprefix = zeroprefix+1;
32771 0 : i = i+1;
32772 : }
32773 :
32774 : /*
32775 : * General case: we assume that N>1 and K>1
32776 : *
32777 : * Make one pass through all elements. At the beginning of
32778 : * the iteration we have:
32779 : * * I element being processed
32780 : * * RunningSum current value of the running sum
32781 : * (including I-th element)
32782 : * * TermsInSum number of terms in sum, 0<=TermsInSum<=K
32783 : * * ZeroPrefix length of the sequence of zero elements
32784 : * which starts at X[I-K+1] and continues towards X[I].
32785 : * Equal to zero in case X[I-K+1] is non-zero.
32786 : * This value is used to make RunningSum exactly zero
32787 : * when it follows from the problem properties.
32788 : */
32789 0 : for(i=n-1; i>=0; i--)
32790 : {
32791 :
32792 : /*
32793 : * Store new value of X[i], save old value in V
32794 : */
32795 0 : v = x->ptr.p_double[i];
32796 0 : x->ptr.p_double[i] = runningsum/termsinsum;
32797 :
32798 : /*
32799 : * Update RunningSum and TermsInSum
32800 : */
32801 0 : if( i-k>=0 )
32802 : {
32803 0 : runningsum = runningsum-v+x->ptr.p_double[i-k];
32804 : }
32805 : else
32806 : {
32807 0 : runningsum = runningsum-v;
32808 0 : termsinsum = termsinsum-1;
32809 : }
32810 :
32811 : /*
32812 : * Update ZeroPrefix.
32813 : * In case we have ZeroPrefix=TermsInSum,
32814 : * RunningSum is reset to zero.
32815 : */
32816 0 : if( i-k>=0 )
32817 : {
32818 0 : if( ae_fp_neq(x->ptr.p_double[i-k],(double)(0)) )
32819 : {
32820 0 : zeroprefix = 0;
32821 : }
32822 : else
32823 : {
32824 0 : zeroprefix = ae_minint(zeroprefix+1, k, _state);
32825 : }
32826 : }
32827 : else
32828 : {
32829 0 : zeroprefix = ae_minint(zeroprefix, i+1, _state);
32830 : }
32831 0 : if( ae_fp_eq((double)(zeroprefix),termsinsum) )
32832 : {
32833 0 : runningsum = (double)(0);
32834 : }
32835 : }
32836 : }
32837 :
32838 :
32839 : /*************************************************************************
32840 : Filters: exponential moving averages.
32841 :
32842 : This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
32843 : defined as filter which replaces X[] by S[]:
32844 : S[0] = X[0]
32845 : S[t] = alpha*X[t] + (1-alpha)*S[t-1]
32846 :
32847 : INPUT PARAMETERS:
32848 : X - array[N], array to process. It can be larger than N,
32849 : in this case only first N points are processed.
32850 : N - points count, N>=0
32851 : alpha - 0<alpha<=1, smoothing parameter.
32852 :
32853 : OUTPUT PARAMETERS:
32854 : X - array, whose first N elements were processed
32855 : with EMA(alpha)
32856 :
32857 : NOTE 1: this function uses efficient in-place algorithm which does not
32858 : allocate temporary arrays.
32859 :
32860 : NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
32861 : new value of X[i] depends on BOTH previous point and X[i] itself.
32862 :
32863 : NOTE 3: technical analytis users quite often work with EMA coefficient
32864 : expressed in DAYS instead of fractions. If you want to calculate
32865 : EMA(N), where N is a number of days, you can use alpha=2/(N+1).
32866 :
32867 : -- ALGLIB --
32868 : Copyright 25.10.2011 by Bochkanov Sergey
32869 : *************************************************************************/
32870 0 : void filterema(/* Real */ ae_vector* x,
32871 : ae_int_t n,
32872 : double alpha,
32873 : ae_state *_state)
32874 : {
32875 : ae_int_t i;
32876 :
32877 :
32878 0 : ae_assert(n>=0, "FilterEMA: N<0", _state);
32879 0 : ae_assert(x->cnt>=n, "FilterEMA: Length(X)<N", _state);
32880 0 : ae_assert(isfinitevector(x, n, _state), "FilterEMA: X contains INF or NAN", _state);
32881 0 : ae_assert(ae_fp_greater(alpha,(double)(0)), "FilterEMA: Alpha<=0", _state);
32882 0 : ae_assert(ae_fp_less_eq(alpha,(double)(1)), "FilterEMA: Alpha>1", _state);
32883 :
32884 : /*
32885 : * Quick exit, if necessary
32886 : */
32887 0 : if( n<=1||ae_fp_eq(alpha,(double)(1)) )
32888 : {
32889 0 : return;
32890 : }
32891 :
32892 : /*
32893 : * Process
32894 : */
32895 0 : for(i=1; i<=n-1; i++)
32896 : {
32897 0 : x->ptr.p_double[i] = alpha*x->ptr.p_double[i]+(1-alpha)*x->ptr.p_double[i-1];
32898 : }
32899 : }
32900 :
32901 :
32902 : /*************************************************************************
32903 : Filters: linear regression moving averages.
32904 :
32905 : This filter replaces array by results of LRMA(K) filter.
32906 :
32907 : LRMA(K) is defined as filter which, for each data point, builds linear
32908 : regression model using K prevous points (point itself is included in
32909 : these K points) and calculates value of this linear model at the point in
32910 : question.
32911 :
32912 : INPUT PARAMETERS:
32913 : X - array[N], array to process. It can be larger than N,
32914 : in this case only first N points are processed.
32915 : N - points count, N>=0
32916 : K - K>=1 (K can be larger than N , such cases will be
32917 : correctly handled). Window width. K=1 corresponds to
32918 : identity transformation (nothing changes).
32919 :
32920 : OUTPUT PARAMETERS:
32921 : X - array, whose first N elements were processed with SMA(K)
32922 :
32923 : NOTE 1: this function uses efficient in-place algorithm which does not
32924 : allocate temporary arrays.
32925 :
32926 : NOTE 2: this algorithm makes only one pass through array and uses running
32927 : sum to speed-up calculation of the averages. Additional measures
32928 : are taken to ensure that running sum on a long sequence of zero
32929 : elements will be correctly reset to zero even in the presence of
32930 : round-off error.
32931 :
32932 : NOTE 3: this is unsymmetric version of the algorithm, which does NOT
32933 : averages points after the current one. Only X[i], X[i-1], ... are
32934 : used when calculating new value of X[i]. We should also note that
32935 : this algorithm uses BOTH previous points and current one, i.e.
32936 : new value of X[i] depends on BOTH previous point and X[i] itself.
32937 :
32938 : -- ALGLIB --
32939 : Copyright 25.10.2011 by Bochkanov Sergey
32940 : *************************************************************************/
32941 0 : void filterlrma(/* Real */ ae_vector* x,
32942 : ae_int_t n,
32943 : ae_int_t k,
32944 : ae_state *_state)
32945 : {
32946 : ae_frame _frame_block;
32947 : ae_int_t i;
32948 : ae_int_t m;
32949 : ae_matrix xy;
32950 : ae_vector s;
32951 : ae_int_t info;
32952 : double a;
32953 : double b;
32954 : double vara;
32955 : double varb;
32956 : double covab;
32957 : double corrab;
32958 : double p;
32959 :
32960 0 : ae_frame_make(_state, &_frame_block);
32961 0 : memset(&xy, 0, sizeof(xy));
32962 0 : memset(&s, 0, sizeof(s));
32963 0 : ae_matrix_init(&xy, 0, 0, DT_REAL, _state, ae_true);
32964 0 : ae_vector_init(&s, 0, DT_REAL, _state, ae_true);
32965 :
32966 0 : ae_assert(n>=0, "FilterLRMA: N<0", _state);
32967 0 : ae_assert(x->cnt>=n, "FilterLRMA: Length(X)<N", _state);
32968 0 : ae_assert(isfinitevector(x, n, _state), "FilterLRMA: X contains INF or NAN", _state);
32969 0 : ae_assert(k>=1, "FilterLRMA: K<1", _state);
32970 :
32971 : /*
32972 : * Quick exit, if necessary:
32973 : * * either N is equal to 1 (nothing to average)
32974 : * * or K is 1 (only point itself is used) or 2 (model is too simple,
32975 : * we will always get identity transformation)
32976 : */
32977 0 : if( n<=1||k<=2 )
32978 : {
32979 0 : ae_frame_leave(_state);
32980 0 : return;
32981 : }
32982 :
32983 : /*
32984 : * General case: K>2, N>1.
32985 : * We do not process points with I<2 because first two points (I=0 and I=1) will be
32986 : * left unmodified by LRMA filter in any case.
32987 : */
32988 0 : ae_matrix_set_length(&xy, k, 2, _state);
32989 0 : ae_vector_set_length(&s, k, _state);
32990 0 : for(i=0; i<=k-1; i++)
32991 : {
32992 0 : xy.ptr.pp_double[i][0] = (double)(i);
32993 0 : s.ptr.p_double[i] = 1.0;
32994 : }
32995 0 : for(i=n-1; i>=2; i--)
32996 : {
32997 0 : m = ae_minint(i+1, k, _state);
32998 0 : ae_v_move(&xy.ptr.pp_double[0][1], xy.stride, &x->ptr.p_double[i-m+1], 1, ae_v_len(0,m-1));
32999 0 : lrlines(&xy, &s, m, &info, &a, &b, &vara, &varb, &covab, &corrab, &p, _state);
33000 0 : ae_assert(info==1, "FilterLRMA: internal error", _state);
33001 0 : x->ptr.p_double[i] = a+b*(m-1);
33002 : }
33003 0 : ae_frame_leave(_state);
33004 : }
33005 :
33006 :
33007 : #endif
33008 : #if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
33009 :
33010 :
33011 : /*************************************************************************
33012 : This subroutine trains logit model.
33013 :
33014 : INPUT PARAMETERS:
33015 : XY - training set, array[0..NPoints-1,0..NVars]
33016 : First NVars columns store values of independent
33017 : variables, next column stores number of class (from 0
33018 : to NClasses-1) which dataset element belongs to. Fractional
33019 : values are rounded to nearest integer.
33020 : NPoints - training set size, NPoints>=1
33021 : NVars - number of independent variables, NVars>=1
33022 : NClasses - number of classes, NClasses>=2
33023 :
33024 : OUTPUT PARAMETERS:
33025 : Info - return code:
33026 : * -2, if there is a point with class number
33027 : outside of [0..NClasses-1].
33028 : * -1, if incorrect parameters was passed
33029 : (NPoints<NVars+2, NVars<1, NClasses<2).
33030 : * 1, if task has been solved
33031 : LM - model built
33032 : Rep - training report
33033 :
33034 : -- ALGLIB --
33035 : Copyright 10.09.2008 by Bochkanov Sergey
33036 : *************************************************************************/
33037 0 : void mnltrainh(/* Real */ ae_matrix* xy,
33038 : ae_int_t npoints,
33039 : ae_int_t nvars,
33040 : ae_int_t nclasses,
33041 : ae_int_t* info,
33042 : logitmodel* lm,
33043 : mnlreport* rep,
33044 : ae_state *_state)
33045 : {
33046 : ae_frame _frame_block;
33047 : ae_int_t i;
33048 : ae_int_t j;
33049 : ae_int_t k;
33050 : ae_int_t ssize;
33051 : ae_bool allsame;
33052 : ae_int_t offs;
33053 : double decay;
33054 : double v;
33055 : double s;
33056 : multilayerperceptron network;
33057 : ae_int_t nin;
33058 : ae_int_t nout;
33059 : ae_int_t wcount;
33060 : double e;
33061 : ae_vector g;
33062 : ae_matrix h;
33063 : ae_bool spd;
33064 : ae_vector x;
33065 : ae_vector y;
33066 : ae_vector wbase;
33067 : double wstep;
33068 : ae_vector wdir;
33069 : ae_vector work;
33070 : ae_int_t mcstage;
33071 : logitmcstate mcstate;
33072 : ae_int_t mcinfo;
33073 : ae_int_t mcnfev;
33074 : ae_int_t solverinfo;
33075 : densesolverreport solverrep;
33076 :
33077 0 : ae_frame_make(_state, &_frame_block);
33078 0 : memset(&network, 0, sizeof(network));
33079 0 : memset(&g, 0, sizeof(g));
33080 0 : memset(&h, 0, sizeof(h));
33081 0 : memset(&x, 0, sizeof(x));
33082 0 : memset(&y, 0, sizeof(y));
33083 0 : memset(&wbase, 0, sizeof(wbase));
33084 0 : memset(&wdir, 0, sizeof(wdir));
33085 0 : memset(&work, 0, sizeof(work));
33086 0 : memset(&mcstate, 0, sizeof(mcstate));
33087 0 : memset(&solverrep, 0, sizeof(solverrep));
33088 0 : *info = 0;
33089 0 : _logitmodel_clear(lm);
33090 0 : _mnlreport_clear(rep);
33091 0 : _multilayerperceptron_init(&network, _state, ae_true);
33092 0 : ae_vector_init(&g, 0, DT_REAL, _state, ae_true);
33093 0 : ae_matrix_init(&h, 0, 0, DT_REAL, _state, ae_true);
33094 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
33095 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
33096 0 : ae_vector_init(&wbase, 0, DT_REAL, _state, ae_true);
33097 0 : ae_vector_init(&wdir, 0, DT_REAL, _state, ae_true);
33098 0 : ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
33099 0 : _logitmcstate_init(&mcstate, _state, ae_true);
33100 0 : _densesolverreport_init(&solverrep, _state, ae_true);
33101 :
33102 0 : decay = 0.001;
33103 :
33104 : /*
33105 : * Test for inputs
33106 : */
33107 0 : if( (npoints<nvars+2||nvars<1)||nclasses<2 )
33108 : {
33109 0 : *info = -1;
33110 0 : ae_frame_leave(_state);
33111 0 : return;
33112 : }
33113 0 : for(i=0; i<=npoints-1; i++)
33114 : {
33115 0 : if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
33116 : {
33117 0 : *info = -2;
33118 0 : ae_frame_leave(_state);
33119 0 : return;
33120 : }
33121 : }
33122 0 : *info = 1;
33123 :
33124 : /*
33125 : * Initialize data
33126 : */
33127 0 : rep->ngrad = 0;
33128 0 : rep->nhess = 0;
33129 :
33130 : /*
33131 : * Allocate array
33132 : */
33133 0 : offs = 5;
33134 0 : ssize = 5+(nvars+1)*(nclasses-1)+nclasses;
33135 0 : ae_vector_set_length(&lm->w, ssize-1+1, _state);
33136 0 : lm->w.ptr.p_double[0] = (double)(ssize);
33137 0 : lm->w.ptr.p_double[1] = (double)(logit_logitvnum);
33138 0 : lm->w.ptr.p_double[2] = (double)(nvars);
33139 0 : lm->w.ptr.p_double[3] = (double)(nclasses);
33140 0 : lm->w.ptr.p_double[4] = (double)(offs);
33141 :
33142 : /*
33143 : * Degenerate case: all outputs are equal
33144 : */
33145 0 : allsame = ae_true;
33146 0 : for(i=1; i<=npoints-1; i++)
33147 : {
33148 0 : if( ae_round(xy->ptr.pp_double[i][nvars], _state)!=ae_round(xy->ptr.pp_double[i-1][nvars], _state) )
33149 : {
33150 0 : allsame = ae_false;
33151 : }
33152 : }
33153 0 : if( allsame )
33154 : {
33155 0 : for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++)
33156 : {
33157 0 : lm->w.ptr.p_double[offs+i] = (double)(0);
33158 : }
33159 0 : v = -2*ae_log(ae_minrealnumber, _state);
33160 0 : k = ae_round(xy->ptr.pp_double[0][nvars], _state);
33161 0 : if( k==nclasses-1 )
33162 : {
33163 0 : for(i=0; i<=nclasses-2; i++)
33164 : {
33165 0 : lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = -v;
33166 : }
33167 : }
33168 : else
33169 : {
33170 0 : for(i=0; i<=nclasses-2; i++)
33171 : {
33172 0 : if( i==k )
33173 : {
33174 0 : lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = v;
33175 : }
33176 : else
33177 : {
33178 0 : lm->w.ptr.p_double[offs+i*(nvars+1)+nvars] = (double)(0);
33179 : }
33180 : }
33181 : }
33182 0 : ae_frame_leave(_state);
33183 0 : return;
33184 : }
33185 :
33186 : /*
33187 : * General case.
33188 : * Prepare task and network. Allocate space.
33189 : */
33190 0 : mlpcreatec0(nvars, nclasses, &network, _state);
33191 0 : mlpinitpreprocessor(&network, xy, npoints, _state);
33192 0 : mlpproperties(&network, &nin, &nout, &wcount, _state);
33193 0 : for(i=0; i<=wcount-1; i++)
33194 : {
33195 0 : network.weights.ptr.p_double[i] = (2*ae_randomreal(_state)-1)/nvars;
33196 : }
33197 0 : ae_vector_set_length(&g, wcount-1+1, _state);
33198 0 : ae_matrix_set_length(&h, wcount-1+1, wcount-1+1, _state);
33199 0 : ae_vector_set_length(&wbase, wcount-1+1, _state);
33200 0 : ae_vector_set_length(&wdir, wcount-1+1, _state);
33201 0 : ae_vector_set_length(&work, wcount-1+1, _state);
33202 :
33203 : /*
33204 : * First stage: optimize in gradient direction.
33205 : */
33206 0 : for(k=0; k<=wcount/3+10; k++)
33207 : {
33208 :
33209 : /*
33210 : * Calculate gradient in starting point
33211 : */
33212 0 : mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
33213 0 : v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33214 0 : e = e+0.5*decay*v;
33215 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
33216 0 : rep->ngrad = rep->ngrad+1;
33217 :
33218 : /*
33219 : * Setup optimization scheme
33220 : */
33221 0 : ae_v_moveneg(&wdir.ptr.p_double[0], 1, &g.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33222 0 : v = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33223 0 : wstep = ae_sqrt(v, _state);
33224 0 : v = 1/ae_sqrt(v, _state);
33225 0 : ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), v);
33226 0 : mcstage = 0;
33227 0 : logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
33228 0 : while(mcstage!=0)
33229 : {
33230 0 : mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
33231 0 : v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33232 0 : e = e+0.5*decay*v;
33233 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
33234 0 : rep->ngrad = rep->ngrad+1;
33235 0 : logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
33236 : }
33237 : }
33238 :
33239 : /*
33240 : * Second stage: use Hessian when we are close to the minimum
33241 : */
33242 : for(;;)
33243 : {
33244 :
33245 : /*
33246 : * Calculate and update E/G/H
33247 : */
33248 0 : mlphessiannbatch(&network, xy, npoints, &e, &g, &h, _state);
33249 0 : v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33250 0 : e = e+0.5*decay*v;
33251 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
33252 0 : for(k=0; k<=wcount-1; k++)
33253 : {
33254 0 : h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
33255 : }
33256 0 : rep->nhess = rep->nhess+1;
33257 :
33258 : /*
33259 : * Select step direction
33260 : * NOTE: it is important to use lower-triangle Cholesky
33261 : * factorization since it is much faster than higher-triangle version.
33262 : */
33263 0 : spd = spdmatrixcholesky(&h, wcount, ae_false, _state);
33264 0 : spdmatrixcholeskysolve(&h, wcount, ae_false, &g, &solverinfo, &solverrep, &wdir, _state);
33265 0 : spd = solverinfo>0;
33266 0 : if( spd )
33267 : {
33268 :
33269 : /*
33270 : * H is positive definite.
33271 : * Step in Newton direction.
33272 : */
33273 0 : ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), -1);
33274 0 : spd = ae_true;
33275 : }
33276 : else
33277 : {
33278 :
33279 : /*
33280 : * H is indefinite.
33281 : * Step in gradient direction.
33282 : */
33283 0 : ae_v_moveneg(&wdir.ptr.p_double[0], 1, &g.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33284 0 : spd = ae_false;
33285 : }
33286 :
33287 : /*
33288 : * Optimize in WDir direction
33289 : */
33290 0 : v = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33291 0 : wstep = ae_sqrt(v, _state);
33292 0 : v = 1/ae_sqrt(v, _state);
33293 0 : ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), v);
33294 0 : mcstage = 0;
33295 0 : logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
33296 0 : while(mcstage!=0)
33297 : {
33298 0 : mlpgradnbatch(&network, xy, npoints, &e, &g, _state);
33299 0 : v = ae_v_dotproduct(&network.weights.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
33300 0 : e = e+0.5*decay*v;
33301 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
33302 0 : rep->ngrad = rep->ngrad+1;
33303 0 : logit_mnlmcsrch(wcount, &network.weights, &e, &g, &wdir, &wstep, &mcinfo, &mcnfev, &work, &mcstate, &mcstage, _state);
33304 : }
33305 0 : if( spd&&((mcinfo==2||mcinfo==4)||mcinfo==6) )
33306 : {
33307 : break;
33308 : }
33309 : }
33310 :
33311 : /*
33312 : * Convert from NN format to MNL format
33313 : */
33314 0 : ae_v_move(&lm->w.ptr.p_double[offs], 1, &network.weights.ptr.p_double[0], 1, ae_v_len(offs,offs+wcount-1));
33315 0 : for(k=0; k<=nvars-1; k++)
33316 : {
33317 0 : for(i=0; i<=nclasses-2; i++)
33318 : {
33319 0 : s = network.columnsigmas.ptr.p_double[k];
33320 0 : if( ae_fp_eq(s,(double)(0)) )
33321 : {
33322 0 : s = (double)(1);
33323 : }
33324 0 : j = offs+(nvars+1)*i;
33325 0 : v = lm->w.ptr.p_double[j+k];
33326 0 : lm->w.ptr.p_double[j+k] = v/s;
33327 0 : lm->w.ptr.p_double[j+nvars] = lm->w.ptr.p_double[j+nvars]+v*network.columnmeans.ptr.p_double[k]/s;
33328 : }
33329 : }
33330 0 : for(k=0; k<=nclasses-2; k++)
33331 : {
33332 0 : lm->w.ptr.p_double[offs+(nvars+1)*k+nvars] = -lm->w.ptr.p_double[offs+(nvars+1)*k+nvars];
33333 : }
33334 0 : ae_frame_leave(_state);
33335 : }
33336 :
33337 :
33338 : /*************************************************************************
33339 : Procesing
33340 :
33341 : INPUT PARAMETERS:
33342 : LM - logit model, passed by non-constant reference
33343 : (some fields of structure are used as temporaries
33344 : when calculating model output).
33345 : X - input vector, array[0..NVars-1].
33346 : Y - (possibly) preallocated buffer; if size of Y is less than
33347 : NClasses, it will be reallocated.If it is large enough, it
33348 : is NOT reallocated, so we can save some time on reallocation.
33349 :
33350 : OUTPUT PARAMETERS:
33351 : Y - result, array[0..NClasses-1]
33352 : Vector of posterior probabilities for classification task.
33353 :
33354 : -- ALGLIB --
33355 : Copyright 10.09.2008 by Bochkanov Sergey
33356 : *************************************************************************/
33357 0 : void mnlprocess(logitmodel* lm,
33358 : /* Real */ ae_vector* x,
33359 : /* Real */ ae_vector* y,
33360 : ae_state *_state)
33361 : {
33362 : ae_int_t nvars;
33363 : ae_int_t nclasses;
33364 : ae_int_t offs;
33365 : ae_int_t i;
33366 : ae_int_t i1;
33367 : double s;
33368 :
33369 :
33370 0 : ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLProcess: unexpected model version", _state);
33371 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
33372 0 : nclasses = ae_round(lm->w.ptr.p_double[3], _state);
33373 0 : offs = ae_round(lm->w.ptr.p_double[4], _state);
33374 0 : logit_mnliexp(&lm->w, x, _state);
33375 0 : s = (double)(0);
33376 0 : i1 = offs+(nvars+1)*(nclasses-1);
33377 0 : for(i=i1; i<=i1+nclasses-1; i++)
33378 : {
33379 0 : s = s+lm->w.ptr.p_double[i];
33380 : }
33381 0 : if( y->cnt<nclasses )
33382 : {
33383 0 : ae_vector_set_length(y, nclasses, _state);
33384 : }
33385 0 : for(i=0; i<=nclasses-1; i++)
33386 : {
33387 0 : y->ptr.p_double[i] = lm->w.ptr.p_double[i1+i]/s;
33388 : }
33389 0 : }
33390 :
33391 :
33392 : /*************************************************************************
33393 : 'interactive' variant of MNLProcess for languages like Python which
33394 : support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the
33395 : interpreter
33396 :
33397 : This function allocates new array on each call, so it is significantly
33398 : slower than its 'non-interactive' counterpart, but it is more convenient
33399 : when you call it from command line.
33400 :
33401 : -- ALGLIB --
33402 : Copyright 10.09.2008 by Bochkanov Sergey
33403 : *************************************************************************/
33404 0 : void mnlprocessi(logitmodel* lm,
33405 : /* Real */ ae_vector* x,
33406 : /* Real */ ae_vector* y,
33407 : ae_state *_state)
33408 : {
33409 :
33410 0 : ae_vector_clear(y);
33411 :
33412 0 : mnlprocess(lm, x, y, _state);
33413 0 : }
33414 :
33415 :
33416 : /*************************************************************************
33417 : Unpacks coefficients of logit model. Logit model have form:
33418 :
33419 : P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1))
33420 : S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1
33421 : S(M-1) = 1
33422 :
33423 : INPUT PARAMETERS:
33424 : LM - logit model in ALGLIB format
33425 :
33426 : OUTPUT PARAMETERS:
33427 : V - coefficients, array[0..NClasses-2,0..NVars]
33428 : NVars - number of independent variables
33429 : NClasses - number of classes
33430 :
33431 : -- ALGLIB --
33432 : Copyright 10.09.2008 by Bochkanov Sergey
33433 : *************************************************************************/
33434 0 : void mnlunpack(logitmodel* lm,
33435 : /* Real */ ae_matrix* a,
33436 : ae_int_t* nvars,
33437 : ae_int_t* nclasses,
33438 : ae_state *_state)
33439 : {
33440 : ae_int_t offs;
33441 : ae_int_t i;
33442 :
33443 0 : ae_matrix_clear(a);
33444 0 : *nvars = 0;
33445 0 : *nclasses = 0;
33446 :
33447 0 : ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLUnpack: unexpected model version", _state);
33448 0 : *nvars = ae_round(lm->w.ptr.p_double[2], _state);
33449 0 : *nclasses = ae_round(lm->w.ptr.p_double[3], _state);
33450 0 : offs = ae_round(lm->w.ptr.p_double[4], _state);
33451 0 : ae_matrix_set_length(a, *nclasses-2+1, *nvars+1, _state);
33452 0 : for(i=0; i<=*nclasses-2; i++)
33453 : {
33454 0 : ae_v_move(&a->ptr.pp_double[i][0], 1, &lm->w.ptr.p_double[offs+i*(*nvars+1)], 1, ae_v_len(0,*nvars));
33455 : }
33456 0 : }
33457 :
33458 :
33459 : /*************************************************************************
33460 : "Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack
33461 : reversed).
33462 :
33463 : INPUT PARAMETERS:
33464 : A - model (see MNLUnpack)
33465 : NVars - number of independent variables
33466 : NClasses - number of classes
33467 :
33468 : OUTPUT PARAMETERS:
33469 : LM - logit model.
33470 :
33471 : -- ALGLIB --
33472 : Copyright 10.09.2008 by Bochkanov Sergey
33473 : *************************************************************************/
33474 0 : void mnlpack(/* Real */ ae_matrix* a,
33475 : ae_int_t nvars,
33476 : ae_int_t nclasses,
33477 : logitmodel* lm,
33478 : ae_state *_state)
33479 : {
33480 : ae_int_t offs;
33481 : ae_int_t i;
33482 : ae_int_t ssize;
33483 :
33484 0 : _logitmodel_clear(lm);
33485 :
33486 0 : offs = 5;
33487 0 : ssize = 5+(nvars+1)*(nclasses-1)+nclasses;
33488 0 : ae_vector_set_length(&lm->w, ssize-1+1, _state);
33489 0 : lm->w.ptr.p_double[0] = (double)(ssize);
33490 0 : lm->w.ptr.p_double[1] = (double)(logit_logitvnum);
33491 0 : lm->w.ptr.p_double[2] = (double)(nvars);
33492 0 : lm->w.ptr.p_double[3] = (double)(nclasses);
33493 0 : lm->w.ptr.p_double[4] = (double)(offs);
33494 0 : for(i=0; i<=nclasses-2; i++)
33495 : {
33496 0 : ae_v_move(&lm->w.ptr.p_double[offs+i*(nvars+1)], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(offs+i*(nvars+1),offs+i*(nvars+1)+nvars));
33497 : }
33498 0 : }
33499 :
33500 :
33501 : /*************************************************************************
33502 : Copying of LogitModel strucure
33503 :
33504 : INPUT PARAMETERS:
33505 : LM1 - original
33506 :
33507 : OUTPUT PARAMETERS:
33508 : LM2 - copy
33509 :
33510 : -- ALGLIB --
33511 : Copyright 15.03.2009 by Bochkanov Sergey
33512 : *************************************************************************/
33513 0 : void mnlcopy(logitmodel* lm1, logitmodel* lm2, ae_state *_state)
33514 : {
33515 : ae_int_t k;
33516 :
33517 0 : _logitmodel_clear(lm2);
33518 :
33519 0 : k = ae_round(lm1->w.ptr.p_double[0], _state);
33520 0 : ae_vector_set_length(&lm2->w, k-1+1, _state);
33521 0 : ae_v_move(&lm2->w.ptr.p_double[0], 1, &lm1->w.ptr.p_double[0], 1, ae_v_len(0,k-1));
33522 0 : }
33523 :
33524 :
33525 : /*************************************************************************
33526 : Average cross-entropy (in bits per element) on the test set
33527 :
33528 : INPUT PARAMETERS:
33529 : LM - logit model
33530 : XY - test set
33531 : NPoints - test set size
33532 :
33533 : RESULT:
33534 : CrossEntropy/(NPoints*ln(2)).
33535 :
33536 : -- ALGLIB --
33537 : Copyright 10.09.2008 by Bochkanov Sergey
33538 : *************************************************************************/
33539 0 : double mnlavgce(logitmodel* lm,
33540 : /* Real */ ae_matrix* xy,
33541 : ae_int_t npoints,
33542 : ae_state *_state)
33543 : {
33544 : ae_frame _frame_block;
33545 : ae_int_t nvars;
33546 : ae_int_t nclasses;
33547 : ae_int_t i;
33548 : ae_vector workx;
33549 : ae_vector worky;
33550 : double result;
33551 :
33552 0 : ae_frame_make(_state, &_frame_block);
33553 0 : memset(&workx, 0, sizeof(workx));
33554 0 : memset(&worky, 0, sizeof(worky));
33555 0 : ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
33556 0 : ae_vector_init(&worky, 0, DT_REAL, _state, ae_true);
33557 :
33558 0 : ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLClsError: unexpected model version", _state);
33559 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
33560 0 : nclasses = ae_round(lm->w.ptr.p_double[3], _state);
33561 0 : ae_vector_set_length(&workx, nvars-1+1, _state);
33562 0 : ae_vector_set_length(&worky, nclasses-1+1, _state);
33563 0 : result = (double)(0);
33564 0 : for(i=0; i<=npoints-1; i++)
33565 : {
33566 0 : ae_assert(ae_round(xy->ptr.pp_double[i][nvars], _state)>=0&&ae_round(xy->ptr.pp_double[i][nvars], _state)<nclasses, "MNLAvgCE: incorrect class number!", _state);
33567 :
33568 : /*
33569 : * Process
33570 : */
33571 0 : ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
33572 0 : mnlprocess(lm, &workx, &worky, _state);
33573 0 : if( ae_fp_greater(worky.ptr.p_double[ae_round(xy->ptr.pp_double[i][nvars], _state)],(double)(0)) )
33574 : {
33575 0 : result = result-ae_log(worky.ptr.p_double[ae_round(xy->ptr.pp_double[i][nvars], _state)], _state);
33576 : }
33577 : else
33578 : {
33579 0 : result = result-ae_log(ae_minrealnumber, _state);
33580 : }
33581 : }
33582 0 : result = result/(npoints*ae_log((double)(2), _state));
33583 0 : ae_frame_leave(_state);
33584 0 : return result;
33585 : }
33586 :
33587 :
33588 : /*************************************************************************
33589 : Relative classification error on the test set
33590 :
33591 : INPUT PARAMETERS:
33592 : LM - logit model
33593 : XY - test set
33594 : NPoints - test set size
33595 :
33596 : RESULT:
33597 : percent of incorrectly classified cases.
33598 :
33599 : -- ALGLIB --
33600 : Copyright 10.09.2008 by Bochkanov Sergey
33601 : *************************************************************************/
33602 0 : double mnlrelclserror(logitmodel* lm,
33603 : /* Real */ ae_matrix* xy,
33604 : ae_int_t npoints,
33605 : ae_state *_state)
33606 : {
33607 : double result;
33608 :
33609 :
33610 0 : result = (double)mnlclserror(lm, xy, npoints, _state)/(double)npoints;
33611 0 : return result;
33612 : }
33613 :
33614 :
33615 : /*************************************************************************
33616 : RMS error on the test set
33617 :
33618 : INPUT PARAMETERS:
33619 : LM - logit model
33620 : XY - test set
33621 : NPoints - test set size
33622 :
33623 : RESULT:
33624 : root mean square error (error when estimating posterior probabilities).
33625 :
33626 : -- ALGLIB --
33627 : Copyright 30.08.2008 by Bochkanov Sergey
33628 : *************************************************************************/
33629 0 : double mnlrmserror(logitmodel* lm,
33630 : /* Real */ ae_matrix* xy,
33631 : ae_int_t npoints,
33632 : ae_state *_state)
33633 : {
33634 : double relcls;
33635 : double avgce;
33636 : double rms;
33637 : double avg;
33638 : double avgrel;
33639 : double result;
33640 :
33641 :
33642 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
33643 0 : logit_mnlallerrors(lm, xy, npoints, &relcls, &avgce, &rms, &avg, &avgrel, _state);
33644 0 : result = rms;
33645 0 : return result;
33646 : }
33647 :
33648 :
33649 : /*************************************************************************
33650 : Average error on the test set
33651 :
33652 : INPUT PARAMETERS:
33653 : LM - logit model
33654 : XY - test set
33655 : NPoints - test set size
33656 :
33657 : RESULT:
33658 : average error (error when estimating posterior probabilities).
33659 :
33660 : -- ALGLIB --
33661 : Copyright 30.08.2008 by Bochkanov Sergey
33662 : *************************************************************************/
33663 0 : double mnlavgerror(logitmodel* lm,
33664 : /* Real */ ae_matrix* xy,
33665 : ae_int_t npoints,
33666 : ae_state *_state)
33667 : {
33668 : double relcls;
33669 : double avgce;
33670 : double rms;
33671 : double avg;
33672 : double avgrel;
33673 : double result;
33674 :
33675 :
33676 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
33677 0 : logit_mnlallerrors(lm, xy, npoints, &relcls, &avgce, &rms, &avg, &avgrel, _state);
33678 0 : result = avg;
33679 0 : return result;
33680 : }
33681 :
33682 :
33683 : /*************************************************************************
33684 : Average relative error on the test set
33685 :
33686 : INPUT PARAMETERS:
33687 : LM - logit model
33688 : XY - test set
33689 : NPoints - test set size
33690 :
33691 : RESULT:
33692 : average relative error (error when estimating posterior probabilities).
33693 :
33694 : -- ALGLIB --
33695 : Copyright 30.08.2008 by Bochkanov Sergey
33696 : *************************************************************************/
33697 0 : double mnlavgrelerror(logitmodel* lm,
33698 : /* Real */ ae_matrix* xy,
33699 : ae_int_t ssize,
33700 : ae_state *_state)
33701 : {
33702 : double relcls;
33703 : double avgce;
33704 : double rms;
33705 : double avg;
33706 : double avgrel;
33707 : double result;
33708 :
33709 :
33710 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNLRMSError: Incorrect MNL version!", _state);
33711 0 : logit_mnlallerrors(lm, xy, ssize, &relcls, &avgce, &rms, &avg, &avgrel, _state);
33712 0 : result = avgrel;
33713 0 : return result;
33714 : }
33715 :
33716 :
33717 : /*************************************************************************
33718 : Classification error on test set = MNLRelClsError*NPoints
33719 :
33720 : -- ALGLIB --
33721 : Copyright 10.09.2008 by Bochkanov Sergey
33722 : *************************************************************************/
33723 0 : ae_int_t mnlclserror(logitmodel* lm,
33724 : /* Real */ ae_matrix* xy,
33725 : ae_int_t npoints,
33726 : ae_state *_state)
33727 : {
33728 : ae_frame _frame_block;
33729 : ae_int_t nvars;
33730 : ae_int_t nclasses;
33731 : ae_int_t i;
33732 : ae_int_t j;
33733 : ae_vector workx;
33734 : ae_vector worky;
33735 : ae_int_t nmax;
33736 : ae_int_t result;
33737 :
33738 0 : ae_frame_make(_state, &_frame_block);
33739 0 : memset(&workx, 0, sizeof(workx));
33740 0 : memset(&worky, 0, sizeof(worky));
33741 0 : ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
33742 0 : ae_vector_init(&worky, 0, DT_REAL, _state, ae_true);
33743 :
33744 0 : ae_assert(ae_fp_eq(lm->w.ptr.p_double[1],(double)(logit_logitvnum)), "MNLClsError: unexpected model version", _state);
33745 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
33746 0 : nclasses = ae_round(lm->w.ptr.p_double[3], _state);
33747 0 : ae_vector_set_length(&workx, nvars-1+1, _state);
33748 0 : ae_vector_set_length(&worky, nclasses-1+1, _state);
33749 0 : result = 0;
33750 0 : for(i=0; i<=npoints-1; i++)
33751 : {
33752 :
33753 : /*
33754 : * Process
33755 : */
33756 0 : ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
33757 0 : mnlprocess(lm, &workx, &worky, _state);
33758 :
33759 : /*
33760 : * Logit version of the answer
33761 : */
33762 0 : nmax = 0;
33763 0 : for(j=0; j<=nclasses-1; j++)
33764 : {
33765 0 : if( ae_fp_greater(worky.ptr.p_double[j],worky.ptr.p_double[nmax]) )
33766 : {
33767 0 : nmax = j;
33768 : }
33769 : }
33770 :
33771 : /*
33772 : * compare
33773 : */
33774 0 : if( nmax!=ae_round(xy->ptr.pp_double[i][nvars], _state) )
33775 : {
33776 0 : result = result+1;
33777 : }
33778 : }
33779 0 : ae_frame_leave(_state);
33780 0 : return result;
33781 : }
33782 :
33783 :
33784 : /*************************************************************************
33785 : Internal subroutine. Places exponents of the anti-overflow shifted
33786 : internal linear outputs into the service part of the W array.
33787 : *************************************************************************/
33788 0 : static void logit_mnliexp(/* Real */ ae_vector* w,
33789 : /* Real */ ae_vector* x,
33790 : ae_state *_state)
33791 : {
33792 : ae_int_t nvars;
33793 : ae_int_t nclasses;
33794 : ae_int_t offs;
33795 : ae_int_t i;
33796 : ae_int_t i1;
33797 : double v;
33798 : double mx;
33799 :
33800 :
33801 0 : ae_assert(ae_fp_eq(w->ptr.p_double[1],(double)(logit_logitvnum)), "LOGIT: unexpected model version", _state);
33802 0 : nvars = ae_round(w->ptr.p_double[2], _state);
33803 0 : nclasses = ae_round(w->ptr.p_double[3], _state);
33804 0 : offs = ae_round(w->ptr.p_double[4], _state);
33805 0 : i1 = offs+(nvars+1)*(nclasses-1);
33806 0 : for(i=0; i<=nclasses-2; i++)
33807 : {
33808 0 : v = ae_v_dotproduct(&w->ptr.p_double[offs+i*(nvars+1)], 1, &x->ptr.p_double[0], 1, ae_v_len(offs+i*(nvars+1),offs+i*(nvars+1)+nvars-1));
33809 0 : w->ptr.p_double[i1+i] = v+w->ptr.p_double[offs+i*(nvars+1)+nvars];
33810 : }
33811 0 : w->ptr.p_double[i1+nclasses-1] = (double)(0);
33812 0 : mx = (double)(0);
33813 0 : for(i=i1; i<=i1+nclasses-1; i++)
33814 : {
33815 0 : mx = ae_maxreal(mx, w->ptr.p_double[i], _state);
33816 : }
33817 0 : for(i=i1; i<=i1+nclasses-1; i++)
33818 : {
33819 0 : w->ptr.p_double[i] = ae_exp(w->ptr.p_double[i]-mx, _state);
33820 : }
33821 0 : }
33822 :
33823 :
33824 : /*************************************************************************
33825 : Calculation of all types of errors
33826 :
33827 : -- ALGLIB --
33828 : Copyright 30.08.2008 by Bochkanov Sergey
33829 : *************************************************************************/
33830 0 : static void logit_mnlallerrors(logitmodel* lm,
33831 : /* Real */ ae_matrix* xy,
33832 : ae_int_t npoints,
33833 : double* relcls,
33834 : double* avgce,
33835 : double* rms,
33836 : double* avg,
33837 : double* avgrel,
33838 : ae_state *_state)
33839 : {
33840 : ae_frame _frame_block;
33841 : ae_int_t nvars;
33842 : ae_int_t nclasses;
33843 : ae_int_t i;
33844 : ae_vector buf;
33845 : ae_vector workx;
33846 : ae_vector y;
33847 : ae_vector dy;
33848 :
33849 0 : ae_frame_make(_state, &_frame_block);
33850 0 : memset(&buf, 0, sizeof(buf));
33851 0 : memset(&workx, 0, sizeof(workx));
33852 0 : memset(&y, 0, sizeof(y));
33853 0 : memset(&dy, 0, sizeof(dy));
33854 0 : *relcls = 0;
33855 0 : *avgce = 0;
33856 0 : *rms = 0;
33857 0 : *avg = 0;
33858 0 : *avgrel = 0;
33859 0 : ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
33860 0 : ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
33861 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
33862 0 : ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
33863 :
33864 0 : ae_assert(ae_round(lm->w.ptr.p_double[1], _state)==logit_logitvnum, "MNL unit: Incorrect MNL version!", _state);
33865 0 : nvars = ae_round(lm->w.ptr.p_double[2], _state);
33866 0 : nclasses = ae_round(lm->w.ptr.p_double[3], _state);
33867 0 : ae_vector_set_length(&workx, nvars-1+1, _state);
33868 0 : ae_vector_set_length(&y, nclasses-1+1, _state);
33869 0 : ae_vector_set_length(&dy, 0+1, _state);
33870 0 : dserrallocate(nclasses, &buf, _state);
33871 0 : for(i=0; i<=npoints-1; i++)
33872 : {
33873 0 : ae_v_move(&workx.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
33874 0 : mnlprocess(lm, &workx, &y, _state);
33875 0 : dy.ptr.p_double[0] = xy->ptr.pp_double[i][nvars];
33876 0 : dserraccumulate(&buf, &y, &dy, _state);
33877 : }
33878 0 : dserrfinish(&buf, _state);
33879 0 : *relcls = buf.ptr.p_double[0];
33880 0 : *avgce = buf.ptr.p_double[1];
33881 0 : *rms = buf.ptr.p_double[2];
33882 0 : *avg = buf.ptr.p_double[3];
33883 0 : *avgrel = buf.ptr.p_double[4];
33884 0 : ae_frame_leave(_state);
33885 0 : }
33886 :
33887 :
33888 : /*************************************************************************
33889 : THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT
33890 : DECREASE CONDITION AND A CURVATURE CONDITION.
33891 :
33892 : AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH
33893 : ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
33894 : SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
33895 :
33896 : F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
33897 :
33898 : IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
33899 : FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF
33900 : UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
33901 :
33902 : THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
33903 : DECREASE CONDITION
33904 :
33905 : F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
33906 :
33907 : AND THE CURVATURE CONDITION
33908 :
33909 : ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
33910 :
33911 : IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
33912 : BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS.
33913 : IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE
33914 : ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS.
33915 : IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
33916 :
33917 : PARAMETERS DESCRIPRION
33918 :
33919 : N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
33920 :
33921 : X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
33922 : THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
33923 :
33924 : F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
33925 : IT CONTAINS THE VALUE OF F AT X + STP*S.
33926 :
33927 : G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
33928 : ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
33929 :
33930 : S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
33931 :
33932 : STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
33933 : OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
33934 :
33935 : FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
33936 : SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
33937 : SATISFIED.
33938 :
33939 : XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
33940 : WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
33941 :
33942 : STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND
33943 : UPPER BOUNDS FOR THE STEP.
33944 :
33945 : MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
33946 : NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
33947 :
33948 : INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
33949 : INFO = 0 IMPROPER INPUT PARAMETERS.
33950 :
33951 : INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE
33952 : DIRECTIONAL DERIVATIVE CONDITION HOLD.
33953 :
33954 : INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
33955 : IS AT MOST XTOL.
33956 :
33957 : INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
33958 :
33959 : INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN.
33960 :
33961 : INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX.
33962 :
33963 : INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS.
33964 : THERE MAY NOT BE A STEP WHICH SATISFIES THE
33965 : SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
33966 : TOLERANCES MAY BE TOO SMALL.
33967 :
33968 : NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
33969 :
33970 : WA IS A WORK ARRAY OF LENGTH N.
33971 :
33972 : ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
33973 : JORGE J. MORE', DAVID J. THUENTE
33974 : *************************************************************************/
33975 0 : static void logit_mnlmcsrch(ae_int_t n,
33976 : /* Real */ ae_vector* x,
33977 : double* f,
33978 : /* Real */ ae_vector* g,
33979 : /* Real */ ae_vector* s,
33980 : double* stp,
33981 : ae_int_t* info,
33982 : ae_int_t* nfev,
33983 : /* Real */ ae_vector* wa,
33984 : logitmcstate* state,
33985 : ae_int_t* stage,
33986 : ae_state *_state)
33987 : {
33988 : double v;
33989 : double p5;
33990 : double p66;
33991 : double zero;
33992 :
33993 :
33994 :
33995 : /*
33996 : * init
33997 : */
33998 0 : p5 = 0.5;
33999 0 : p66 = 0.66;
34000 0 : state->xtrapf = 4.0;
34001 0 : zero = (double)(0);
34002 :
34003 : /*
34004 : * Main cycle
34005 : */
34006 : for(;;)
34007 : {
34008 0 : if( *stage==0 )
34009 : {
34010 :
34011 : /*
34012 : * NEXT
34013 : */
34014 0 : *stage = 2;
34015 0 : continue;
34016 : }
34017 0 : if( *stage==2 )
34018 : {
34019 0 : state->infoc = 1;
34020 0 : *info = 0;
34021 :
34022 : /*
34023 : * CHECK THE INPUT PARAMETERS FOR ERRORS.
34024 : */
34025 0 : if( ((((((n<=0||ae_fp_less_eq(*stp,(double)(0)))||ae_fp_less(logit_ftol,(double)(0)))||ae_fp_less(logit_gtol,zero))||ae_fp_less(logit_xtol,zero))||ae_fp_less(logit_stpmin,zero))||ae_fp_less(logit_stpmax,logit_stpmin))||logit_maxfev<=0 )
34026 : {
34027 0 : *stage = 0;
34028 0 : return;
34029 : }
34030 :
34031 : /*
34032 : * COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
34033 : * AND CHECK THAT S IS A DESCENT DIRECTION.
34034 : */
34035 0 : v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
34036 0 : state->dginit = v;
34037 0 : if( ae_fp_greater_eq(state->dginit,(double)(0)) )
34038 : {
34039 0 : *stage = 0;
34040 0 : return;
34041 : }
34042 :
34043 : /*
34044 : * INITIALIZE LOCAL VARIABLES.
34045 : */
34046 0 : state->brackt = ae_false;
34047 0 : state->stage1 = ae_true;
34048 0 : *nfev = 0;
34049 0 : state->finit = *f;
34050 0 : state->dgtest = logit_ftol*state->dginit;
34051 0 : state->width = logit_stpmax-logit_stpmin;
34052 0 : state->width1 = state->width/p5;
34053 0 : ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
34054 :
34055 : /*
34056 : * THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
34057 : * FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
34058 : * THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
34059 : * FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
34060 : * THE INTERVAL OF UNCERTAINTY.
34061 : * THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
34062 : * FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
34063 : */
34064 0 : state->stx = (double)(0);
34065 0 : state->fx = state->finit;
34066 0 : state->dgx = state->dginit;
34067 0 : state->sty = (double)(0);
34068 0 : state->fy = state->finit;
34069 0 : state->dgy = state->dginit;
34070 :
34071 : /*
34072 : * NEXT
34073 : */
34074 0 : *stage = 3;
34075 0 : continue;
34076 : }
34077 0 : if( *stage==3 )
34078 : {
34079 :
34080 : /*
34081 : * START OF ITERATION.
34082 : *
34083 : * SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
34084 : * TO THE PRESENT INTERVAL OF UNCERTAINTY.
34085 : */
34086 0 : if( state->brackt )
34087 : {
34088 0 : if( ae_fp_less(state->stx,state->sty) )
34089 : {
34090 0 : state->stmin = state->stx;
34091 0 : state->stmax = state->sty;
34092 : }
34093 : else
34094 : {
34095 0 : state->stmin = state->sty;
34096 0 : state->stmax = state->stx;
34097 : }
34098 : }
34099 : else
34100 : {
34101 0 : state->stmin = state->stx;
34102 0 : state->stmax = *stp+state->xtrapf*(*stp-state->stx);
34103 : }
34104 :
34105 : /*
34106 : * FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
34107 : */
34108 0 : if( ae_fp_greater(*stp,logit_stpmax) )
34109 : {
34110 0 : *stp = logit_stpmax;
34111 : }
34112 0 : if( ae_fp_less(*stp,logit_stpmin) )
34113 : {
34114 0 : *stp = logit_stpmin;
34115 : }
34116 :
34117 : /*
34118 : * IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
34119 : * STP BE THE LOWEST POINT OBTAINED SO FAR.
34120 : */
34121 0 : if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=logit_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,logit_xtol*state->stmax)) )
34122 : {
34123 0 : *stp = state->stx;
34124 : }
34125 :
34126 : /*
34127 : * EVALUATE THE FUNCTION AND GRADIENT AT STP
34128 : * AND COMPUTE THE DIRECTIONAL DERIVATIVE.
34129 : */
34130 0 : ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
34131 0 : ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
34132 :
34133 : /*
34134 : * NEXT
34135 : */
34136 0 : *stage = 4;
34137 0 : return;
34138 : }
34139 0 : if( *stage==4 )
34140 : {
34141 0 : *info = 0;
34142 0 : *nfev = *nfev+1;
34143 0 : v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
34144 0 : state->dg = v;
34145 0 : state->ftest1 = state->finit+*stp*state->dgtest;
34146 :
34147 : /*
34148 : * TEST FOR CONVERGENCE.
34149 : */
34150 0 : if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
34151 : {
34152 0 : *info = 6;
34153 : }
34154 0 : if( (ae_fp_eq(*stp,logit_stpmax)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
34155 : {
34156 0 : *info = 5;
34157 : }
34158 0 : if( ae_fp_eq(*stp,logit_stpmin)&&(ae_fp_greater(*f,state->ftest1)||ae_fp_greater_eq(state->dg,state->dgtest)) )
34159 : {
34160 0 : *info = 4;
34161 : }
34162 0 : if( *nfev>=logit_maxfev )
34163 : {
34164 0 : *info = 3;
34165 : }
34166 0 : if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,logit_xtol*state->stmax) )
34167 : {
34168 0 : *info = 2;
34169 : }
34170 0 : if( ae_fp_less_eq(*f,state->ftest1)&&ae_fp_less_eq(ae_fabs(state->dg, _state),-logit_gtol*state->dginit) )
34171 : {
34172 0 : *info = 1;
34173 : }
34174 :
34175 : /*
34176 : * CHECK FOR TERMINATION.
34177 : */
34178 0 : if( *info!=0 )
34179 : {
34180 0 : *stage = 0;
34181 0 : return;
34182 : }
34183 :
34184 : /*
34185 : * IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
34186 : * FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
34187 : */
34188 0 : if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(logit_ftol, logit_gtol, _state)*state->dginit) )
34189 : {
34190 0 : state->stage1 = ae_false;
34191 : }
34192 :
34193 : /*
34194 : * A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
34195 : * WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
34196 : * FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
34197 : * DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
34198 : * OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
34199 : */
34200 0 : if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
34201 : {
34202 :
34203 : /*
34204 : * DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
34205 : */
34206 0 : state->fm = *f-*stp*state->dgtest;
34207 0 : state->fxm = state->fx-state->stx*state->dgtest;
34208 0 : state->fym = state->fy-state->sty*state->dgtest;
34209 0 : state->dgm = state->dg-state->dgtest;
34210 0 : state->dgxm = state->dgx-state->dgtest;
34211 0 : state->dgym = state->dgy-state->dgtest;
34212 :
34213 : /*
34214 : * CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
34215 : * AND TO COMPUTE THE NEW STEP.
34216 : */
34217 0 : logit_mnlmcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
34218 :
34219 : /*
34220 : * RESET THE FUNCTION AND GRADIENT VALUES FOR F.
34221 : */
34222 0 : state->fx = state->fxm+state->stx*state->dgtest;
34223 0 : state->fy = state->fym+state->sty*state->dgtest;
34224 0 : state->dgx = state->dgxm+state->dgtest;
34225 0 : state->dgy = state->dgym+state->dgtest;
34226 : }
34227 : else
34228 : {
34229 :
34230 : /*
34231 : * CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
34232 : * AND TO COMPUTE THE NEW STEP.
34233 : */
34234 0 : logit_mnlmcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
34235 : }
34236 :
34237 : /*
34238 : * FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
34239 : * INTERVAL OF UNCERTAINTY.
34240 : */
34241 0 : if( state->brackt )
34242 : {
34243 0 : if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
34244 : {
34245 0 : *stp = state->stx+p5*(state->sty-state->stx);
34246 : }
34247 0 : state->width1 = state->width;
34248 0 : state->width = ae_fabs(state->sty-state->stx, _state);
34249 : }
34250 :
34251 : /*
34252 : * NEXT.
34253 : */
34254 0 : *stage = 3;
34255 0 : continue;
34256 : }
34257 : }
34258 : }
34259 :
34260 :
34261 0 : static void logit_mnlmcstep(double* stx,
34262 : double* fx,
34263 : double* dx,
34264 : double* sty,
34265 : double* fy,
34266 : double* dy,
34267 : double* stp,
34268 : double fp,
34269 : double dp,
34270 : ae_bool* brackt,
34271 : double stmin,
34272 : double stmax,
34273 : ae_int_t* info,
34274 : ae_state *_state)
34275 : {
34276 : ae_bool bound;
34277 : double gamma;
34278 : double p;
34279 : double q;
34280 : double r;
34281 : double s;
34282 : double sgnd;
34283 : double stpc;
34284 : double stpf;
34285 : double stpq;
34286 : double theta;
34287 :
34288 :
34289 0 : *info = 0;
34290 :
34291 : /*
34292 : * CHECK THE INPUT PARAMETERS FOR ERRORS.
34293 : */
34294 0 : if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),(double)(0)))||ae_fp_less(stmax,stmin) )
34295 : {
34296 0 : return;
34297 : }
34298 :
34299 : /*
34300 : * DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
34301 : */
34302 0 : sgnd = dp*(*dx/ae_fabs(*dx, _state));
34303 :
34304 : /*
34305 : * FIRST CASE. A HIGHER FUNCTION VALUE.
34306 : * THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
34307 : * TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
34308 : * ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
34309 : */
34310 0 : if( ae_fp_greater(fp,*fx) )
34311 : {
34312 0 : *info = 1;
34313 0 : bound = ae_true;
34314 0 : theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
34315 0 : s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
34316 0 : gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
34317 0 : if( ae_fp_less(*stp,*stx) )
34318 : {
34319 0 : gamma = -gamma;
34320 : }
34321 0 : p = gamma-(*dx)+theta;
34322 0 : q = gamma-(*dx)+gamma+dp;
34323 0 : r = p/q;
34324 0 : stpc = *stx+r*(*stp-(*stx));
34325 0 : stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
34326 0 : if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
34327 : {
34328 0 : stpf = stpc;
34329 : }
34330 : else
34331 : {
34332 0 : stpf = stpc+(stpq-stpc)/2;
34333 : }
34334 0 : *brackt = ae_true;
34335 : }
34336 : else
34337 : {
34338 0 : if( ae_fp_less(sgnd,(double)(0)) )
34339 : {
34340 :
34341 : /*
34342 : * SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
34343 : * OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
34344 : * STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
34345 : * THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
34346 : */
34347 0 : *info = 2;
34348 0 : bound = ae_false;
34349 0 : theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
34350 0 : s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
34351 0 : gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
34352 0 : if( ae_fp_greater(*stp,*stx) )
34353 : {
34354 0 : gamma = -gamma;
34355 : }
34356 0 : p = gamma-dp+theta;
34357 0 : q = gamma-dp+gamma+(*dx);
34358 0 : r = p/q;
34359 0 : stpc = *stp+r*(*stx-(*stp));
34360 0 : stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
34361 0 : if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
34362 : {
34363 0 : stpf = stpc;
34364 : }
34365 : else
34366 : {
34367 0 : stpf = stpq;
34368 : }
34369 0 : *brackt = ae_true;
34370 : }
34371 : else
34372 : {
34373 0 : if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
34374 : {
34375 :
34376 : /*
34377 : * THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
34378 : * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
34379 : * THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
34380 : * IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
34381 : * IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
34382 : * EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
34383 : * COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
34384 : * CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
34385 : */
34386 0 : *info = 3;
34387 0 : bound = ae_true;
34388 0 : theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
34389 0 : s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
34390 :
34391 : /*
34392 : * THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
34393 : * TO INFINITY IN THE DIRECTION OF THE STEP.
34394 : */
34395 0 : gamma = s*ae_sqrt(ae_maxreal((double)(0), ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
34396 0 : if( ae_fp_greater(*stp,*stx) )
34397 : {
34398 0 : gamma = -gamma;
34399 : }
34400 0 : p = gamma-dp+theta;
34401 0 : q = gamma+(*dx-dp)+gamma;
34402 0 : r = p/q;
34403 0 : if( ae_fp_less(r,(double)(0))&&ae_fp_neq(gamma,(double)(0)) )
34404 : {
34405 0 : stpc = *stp+r*(*stx-(*stp));
34406 : }
34407 : else
34408 : {
34409 0 : if( ae_fp_greater(*stp,*stx) )
34410 : {
34411 0 : stpc = stmax;
34412 : }
34413 : else
34414 : {
34415 0 : stpc = stmin;
34416 : }
34417 : }
34418 0 : stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
34419 0 : if( *brackt )
34420 : {
34421 0 : if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
34422 : {
34423 0 : stpf = stpc;
34424 : }
34425 : else
34426 : {
34427 0 : stpf = stpq;
34428 : }
34429 : }
34430 : else
34431 : {
34432 0 : if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
34433 : {
34434 0 : stpf = stpc;
34435 : }
34436 : else
34437 : {
34438 0 : stpf = stpq;
34439 : }
34440 : }
34441 : }
34442 : else
34443 : {
34444 :
34445 : /*
34446 : * FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
34447 : * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
34448 : * NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
34449 : * IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
34450 : */
34451 0 : *info = 4;
34452 0 : bound = ae_false;
34453 0 : if( *brackt )
34454 : {
34455 0 : theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
34456 0 : s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
34457 0 : gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
34458 0 : if( ae_fp_greater(*stp,*sty) )
34459 : {
34460 0 : gamma = -gamma;
34461 : }
34462 0 : p = gamma-dp+theta;
34463 0 : q = gamma-dp+gamma+(*dy);
34464 0 : r = p/q;
34465 0 : stpc = *stp+r*(*sty-(*stp));
34466 0 : stpf = stpc;
34467 : }
34468 : else
34469 : {
34470 0 : if( ae_fp_greater(*stp,*stx) )
34471 : {
34472 0 : stpf = stmax;
34473 : }
34474 : else
34475 : {
34476 0 : stpf = stmin;
34477 : }
34478 : }
34479 : }
34480 : }
34481 : }
34482 :
34483 : /*
34484 : * UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
34485 : * DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
34486 : */
34487 0 : if( ae_fp_greater(fp,*fx) )
34488 : {
34489 0 : *sty = *stp;
34490 0 : *fy = fp;
34491 0 : *dy = dp;
34492 : }
34493 : else
34494 : {
34495 0 : if( ae_fp_less(sgnd,0.0) )
34496 : {
34497 0 : *sty = *stx;
34498 0 : *fy = *fx;
34499 0 : *dy = *dx;
34500 : }
34501 0 : *stx = *stp;
34502 0 : *fx = fp;
34503 0 : *dx = dp;
34504 : }
34505 :
34506 : /*
34507 : * COMPUTE THE NEW STEP AND SAFEGUARD IT.
34508 : */
34509 0 : stpf = ae_minreal(stmax, stpf, _state);
34510 0 : stpf = ae_maxreal(stmin, stpf, _state);
34511 0 : *stp = stpf;
34512 0 : if( *brackt&&bound )
34513 : {
34514 0 : if( ae_fp_greater(*sty,*stx) )
34515 : {
34516 0 : *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
34517 : }
34518 : else
34519 : {
34520 0 : *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
34521 : }
34522 : }
34523 : }
34524 :
34525 :
34526 0 : void _logitmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
34527 : {
34528 0 : logitmodel *p = (logitmodel*)_p;
34529 0 : ae_touch_ptr((void*)p);
34530 0 : ae_vector_init(&p->w, 0, DT_REAL, _state, make_automatic);
34531 0 : }
34532 :
34533 :
34534 0 : void _logitmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
34535 : {
34536 0 : logitmodel *dst = (logitmodel*)_dst;
34537 0 : logitmodel *src = (logitmodel*)_src;
34538 0 : ae_vector_init_copy(&dst->w, &src->w, _state, make_automatic);
34539 0 : }
34540 :
34541 :
34542 0 : void _logitmodel_clear(void* _p)
34543 : {
34544 0 : logitmodel *p = (logitmodel*)_p;
34545 0 : ae_touch_ptr((void*)p);
34546 0 : ae_vector_clear(&p->w);
34547 0 : }
34548 :
34549 :
34550 0 : void _logitmodel_destroy(void* _p)
34551 : {
34552 0 : logitmodel *p = (logitmodel*)_p;
34553 0 : ae_touch_ptr((void*)p);
34554 0 : ae_vector_destroy(&p->w);
34555 0 : }
34556 :
34557 :
34558 0 : void _logitmcstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
34559 : {
34560 0 : logitmcstate *p = (logitmcstate*)_p;
34561 0 : ae_touch_ptr((void*)p);
34562 0 : }
34563 :
34564 :
34565 0 : void _logitmcstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
34566 : {
34567 0 : logitmcstate *dst = (logitmcstate*)_dst;
34568 0 : logitmcstate *src = (logitmcstate*)_src;
34569 0 : dst->brackt = src->brackt;
34570 0 : dst->stage1 = src->stage1;
34571 0 : dst->infoc = src->infoc;
34572 0 : dst->dg = src->dg;
34573 0 : dst->dgm = src->dgm;
34574 0 : dst->dginit = src->dginit;
34575 0 : dst->dgtest = src->dgtest;
34576 0 : dst->dgx = src->dgx;
34577 0 : dst->dgxm = src->dgxm;
34578 0 : dst->dgy = src->dgy;
34579 0 : dst->dgym = src->dgym;
34580 0 : dst->finit = src->finit;
34581 0 : dst->ftest1 = src->ftest1;
34582 0 : dst->fm = src->fm;
34583 0 : dst->fx = src->fx;
34584 0 : dst->fxm = src->fxm;
34585 0 : dst->fy = src->fy;
34586 0 : dst->fym = src->fym;
34587 0 : dst->stx = src->stx;
34588 0 : dst->sty = src->sty;
34589 0 : dst->stmin = src->stmin;
34590 0 : dst->stmax = src->stmax;
34591 0 : dst->width = src->width;
34592 0 : dst->width1 = src->width1;
34593 0 : dst->xtrapf = src->xtrapf;
34594 0 : }
34595 :
34596 :
34597 0 : void _logitmcstate_clear(void* _p)
34598 : {
34599 0 : logitmcstate *p = (logitmcstate*)_p;
34600 0 : ae_touch_ptr((void*)p);
34601 0 : }
34602 :
34603 :
34604 0 : void _logitmcstate_destroy(void* _p)
34605 : {
34606 0 : logitmcstate *p = (logitmcstate*)_p;
34607 0 : ae_touch_ptr((void*)p);
34608 0 : }
34609 :
34610 :
34611 0 : void _mnlreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
34612 : {
34613 0 : mnlreport *p = (mnlreport*)_p;
34614 0 : ae_touch_ptr((void*)p);
34615 0 : }
34616 :
34617 :
34618 0 : void _mnlreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
34619 : {
34620 0 : mnlreport *dst = (mnlreport*)_dst;
34621 0 : mnlreport *src = (mnlreport*)_src;
34622 0 : dst->ngrad = src->ngrad;
34623 0 : dst->nhess = src->nhess;
34624 0 : }
34625 :
34626 :
34627 0 : void _mnlreport_clear(void* _p)
34628 : {
34629 0 : mnlreport *p = (mnlreport*)_p;
34630 0 : ae_touch_ptr((void*)p);
34631 0 : }
34632 :
34633 :
34634 0 : void _mnlreport_destroy(void* _p)
34635 : {
34636 0 : mnlreport *p = (mnlreport*)_p;
34637 0 : ae_touch_ptr((void*)p);
34638 0 : }
34639 :
34640 :
34641 : #endif
34642 : #if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
34643 :
34644 :
34645 : /*************************************************************************
34646 : DESCRIPTION:
34647 :
34648 : This function creates MCPD (Markov Chains for Population Data) solver.
34649 :
34650 : This solver can be used to find transition matrix P for N-dimensional
34651 : prediction problem where transition from X[i] to X[i+1] is modelled as
34652 : X[i+1] = P*X[i]
34653 : where X[i] and X[i+1] are N-dimensional population vectors (components of
34654 : each X are non-negative), and P is a N*N transition matrix (elements of P
34655 : are non-negative, each column sums to 1.0).
34656 :
34657 : Such models arise when when:
34658 : * there is some population of individuals
34659 : * individuals can have different states
34660 : * individuals can transit from one state to another
34661 : * population size is constant, i.e. there is no new individuals and no one
34662 : leaves population
34663 : * you want to model transitions of individuals from one state into another
34664 :
34665 : USAGE:
34666 :
34667 : Here we give very brief outline of the MCPD. We strongly recommend you to
34668 : read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
34669 : on data analysis which is available at http://www.alglib.net/dataanalysis/
34670 :
34671 : 1. User initializes algorithm state with MCPDCreate() call
34672 :
34673 : 2. User adds one or more tracks - sequences of states which describe
34674 : evolution of a system being modelled from different starting conditions
34675 :
34676 : 3. User may add optional boundary, equality and/or linear constraints on
34677 : the coefficients of P by calling one of the following functions:
34678 : * MCPDSetEC() to set equality constraints
34679 : * MCPDSetBC() to set bound constraints
34680 : * MCPDSetLC() to set linear constraints
34681 :
34682 : 4. Optionally, user may set custom weights for prediction errors (by
34683 : default, algorithm assigns non-equal, automatically chosen weights for
34684 : errors in the prediction of different components of X). It can be done
34685 : with a call of MCPDSetPredictionWeights() function.
34686 :
34687 : 5. User calls MCPDSolve() function which takes algorithm state and
34688 : pointer (delegate, etc.) to callback function which calculates F/G.
34689 :
34690 : 6. User calls MCPDResults() to get solution
34691 :
34692 : INPUT PARAMETERS:
34693 : N - problem dimension, N>=1
34694 :
34695 : OUTPUT PARAMETERS:
34696 : State - structure stores algorithm state
34697 :
34698 : -- ALGLIB --
34699 : Copyright 23.05.2010 by Bochkanov Sergey
34700 : *************************************************************************/
34701 0 : void mcpdcreate(ae_int_t n, mcpdstate* s, ae_state *_state)
34702 : {
34703 :
34704 0 : _mcpdstate_clear(s);
34705 :
34706 0 : ae_assert(n>=1, "MCPDCreate: N<1", _state);
34707 0 : mcpd_mcpdinit(n, -1, -1, s, _state);
34708 0 : }
34709 :
34710 :
34711 : /*************************************************************************
34712 : DESCRIPTION:
34713 :
34714 : This function is a specialized version of MCPDCreate() function, and we
34715 : recommend you to read comments for this function for general information
34716 : about MCPD solver.
34717 :
34718 : This function creates MCPD (Markov Chains for Population Data) solver
34719 : for "Entry-state" model, i.e. model where transition from X[i] to X[i+1]
34720 : is modelled as
34721 : X[i+1] = P*X[i]
34722 : where
34723 : X[i] and X[i+1] are N-dimensional state vectors
34724 : P is a N*N transition matrix
34725 : and one selected component of X[] is called "entry" state and is treated
34726 : in a special way:
34727 : system state always transits from "entry" state to some another state
34728 : system state can not transit from any state into "entry" state
34729 : Such conditions basically mean that row of P which corresponds to "entry"
34730 : state is zero.
34731 :
34732 : Such models arise when:
34733 : * there is some population of individuals
34734 : * individuals can have different states
34735 : * individuals can transit from one state to another
34736 : * population size is NOT constant - at every moment of time there is some
34737 : (unpredictable) amount of "new" individuals, which can transit into one
34738 : of the states at the next turn, but still no one leaves population
34739 : * you want to model transitions of individuals from one state into another
34740 : * but you do NOT want to predict amount of "new" individuals because it
34741 : does not depends on individuals already present (hence system can not
34742 : transit INTO entry state - it can only transit FROM it).
34743 :
34744 : This model is discussed in more details in the ALGLIB User Guide (see
34745 : http://www.alglib.net/dataanalysis/ for more data).
34746 :
34747 : INPUT PARAMETERS:
34748 : N - problem dimension, N>=2
34749 : EntryState- index of entry state, in 0..N-1
34750 :
34751 : OUTPUT PARAMETERS:
34752 : State - structure stores algorithm state
34753 :
34754 : -- ALGLIB --
34755 : Copyright 23.05.2010 by Bochkanov Sergey
34756 : *************************************************************************/
34757 0 : void mcpdcreateentry(ae_int_t n,
34758 : ae_int_t entrystate,
34759 : mcpdstate* s,
34760 : ae_state *_state)
34761 : {
34762 :
34763 0 : _mcpdstate_clear(s);
34764 :
34765 0 : ae_assert(n>=2, "MCPDCreateEntry: N<2", _state);
34766 0 : ae_assert(entrystate>=0, "MCPDCreateEntry: EntryState<0", _state);
34767 0 : ae_assert(entrystate<n, "MCPDCreateEntry: EntryState>=N", _state);
34768 0 : mcpd_mcpdinit(n, entrystate, -1, s, _state);
34769 0 : }
34770 :
34771 :
34772 : /*************************************************************************
34773 : DESCRIPTION:
34774 :
34775 : This function is a specialized version of MCPDCreate() function, and we
34776 : recommend you to read comments for this function for general information
34777 : about MCPD solver.
34778 :
34779 : This function creates MCPD (Markov Chains for Population Data) solver
34780 : for "Exit-state" model, i.e. model where transition from X[i] to X[i+1]
34781 : is modelled as
34782 : X[i+1] = P*X[i]
34783 : where
34784 : X[i] and X[i+1] are N-dimensional state vectors
34785 : P is a N*N transition matrix
34786 : and one selected component of X[] is called "exit" state and is treated
34787 : in a special way:
34788 : system state can transit from any state into "exit" state
34789 : system state can not transit from "exit" state into any other state
34790 : transition operator discards "exit" state (makes it zero at each turn)
34791 : Such conditions basically mean that column of P which corresponds to
34792 : "exit" state is zero. Multiplication by such P may decrease sum of vector
34793 : components.
34794 :
34795 : Such models arise when:
34796 : * there is some population of individuals
34797 : * individuals can have different states
34798 : * individuals can transit from one state to another
34799 : * population size is NOT constant - individuals can move into "exit" state
34800 : and leave population at the next turn, but there are no new individuals
34801 : * amount of individuals which leave population can be predicted
34802 : * you want to model transitions of individuals from one state into another
34803 : (including transitions into the "exit" state)
34804 :
34805 : This model is discussed in more details in the ALGLIB User Guide (see
34806 : http://www.alglib.net/dataanalysis/ for more data).
34807 :
34808 : INPUT PARAMETERS:
34809 : N - problem dimension, N>=2
34810 : ExitState- index of exit state, in 0..N-1
34811 :
34812 : OUTPUT PARAMETERS:
34813 : State - structure stores algorithm state
34814 :
34815 : -- ALGLIB --
34816 : Copyright 23.05.2010 by Bochkanov Sergey
34817 : *************************************************************************/
34818 0 : void mcpdcreateexit(ae_int_t n,
34819 : ae_int_t exitstate,
34820 : mcpdstate* s,
34821 : ae_state *_state)
34822 : {
34823 :
34824 0 : _mcpdstate_clear(s);
34825 :
34826 0 : ae_assert(n>=2, "MCPDCreateExit: N<2", _state);
34827 0 : ae_assert(exitstate>=0, "MCPDCreateExit: ExitState<0", _state);
34828 0 : ae_assert(exitstate<n, "MCPDCreateExit: ExitState>=N", _state);
34829 0 : mcpd_mcpdinit(n, -1, exitstate, s, _state);
34830 0 : }
34831 :
34832 :
34833 : /*************************************************************************
34834 : DESCRIPTION:
34835 :
34836 : This function is a specialized version of MCPDCreate() function, and we
34837 : recommend you to read comments for this function for general information
34838 : about MCPD solver.
34839 :
34840 : This function creates MCPD (Markov Chains for Population Data) solver
34841 : for "Entry-Exit-states" model, i.e. model where transition from X[i] to
34842 : X[i+1] is modelled as
34843 : X[i+1] = P*X[i]
34844 : where
34845 : X[i] and X[i+1] are N-dimensional state vectors
34846 : P is a N*N transition matrix
34847 : one selected component of X[] is called "entry" state and is treated in a
34848 : special way:
34849 : system state always transits from "entry" state to some another state
34850 : system state can not transit from any state into "entry" state
34851 : and another one component of X[] is called "exit" state and is treated in
34852 : a special way too:
34853 : system state can transit from any state into "exit" state
34854 : system state can not transit from "exit" state into any other state
34855 : transition operator discards "exit" state (makes it zero at each turn)
34856 : Such conditions basically mean that:
34857 : row of P which corresponds to "entry" state is zero
34858 : column of P which corresponds to "exit" state is zero
34859 : Multiplication by such P may decrease sum of vector components.
34860 :
34861 : Such models arise when:
34862 : * there is some population of individuals
34863 : * individuals can have different states
34864 : * individuals can transit from one state to another
34865 : * population size is NOT constant
34866 : * at every moment of time there is some (unpredictable) amount of "new"
34867 : individuals, which can transit into one of the states at the next turn
34868 : * some individuals can move (predictably) into "exit" state and leave
34869 : population at the next turn
34870 : * you want to model transitions of individuals from one state into another,
34871 : including transitions from the "entry" state and into the "exit" state.
34872 : * but you do NOT want to predict amount of "new" individuals because it
34873 : does not depends on individuals already present (hence system can not
34874 : transit INTO entry state - it can only transit FROM it).
34875 :
34876 : This model is discussed in more details in the ALGLIB User Guide (see
34877 : http://www.alglib.net/dataanalysis/ for more data).
34878 :
34879 : INPUT PARAMETERS:
34880 : N - problem dimension, N>=2
34881 : EntryState- index of entry state, in 0..N-1
34882 : ExitState- index of exit state, in 0..N-1
34883 :
34884 : OUTPUT PARAMETERS:
34885 : State - structure stores algorithm state
34886 :
34887 : -- ALGLIB --
34888 : Copyright 23.05.2010 by Bochkanov Sergey
34889 : *************************************************************************/
34890 0 : void mcpdcreateentryexit(ae_int_t n,
34891 : ae_int_t entrystate,
34892 : ae_int_t exitstate,
34893 : mcpdstate* s,
34894 : ae_state *_state)
34895 : {
34896 :
34897 0 : _mcpdstate_clear(s);
34898 :
34899 0 : ae_assert(n>=2, "MCPDCreateEntryExit: N<2", _state);
34900 0 : ae_assert(entrystate>=0, "MCPDCreateEntryExit: EntryState<0", _state);
34901 0 : ae_assert(entrystate<n, "MCPDCreateEntryExit: EntryState>=N", _state);
34902 0 : ae_assert(exitstate>=0, "MCPDCreateEntryExit: ExitState<0", _state);
34903 0 : ae_assert(exitstate<n, "MCPDCreateEntryExit: ExitState>=N", _state);
34904 0 : ae_assert(entrystate!=exitstate, "MCPDCreateEntryExit: EntryState=ExitState", _state);
34905 0 : mcpd_mcpdinit(n, entrystate, exitstate, s, _state);
34906 0 : }
34907 :
34908 :
34909 : /*************************************************************************
34910 : This function is used to add a track - sequence of system states at the
34911 : different moments of its evolution.
34912 :
34913 : You may add one or several tracks to the MCPD solver. In case you have
34914 : several tracks, they won't overwrite each other. For example, if you pass
34915 : two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
34916 : solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
34917 : t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
34918 : wont try to model transition from t=A+3 to t=B+1.
34919 :
34920 : INPUT PARAMETERS:
34921 : S - solver
34922 : XY - track, array[K,N]:
34923 : * I-th row is a state at t=I
34924 : * elements of XY must be non-negative (exception will be
34925 : thrown on negative elements)
34926 : K - number of points in a track
34927 : * if given, only leading K rows of XY are used
34928 : * if not given, automatically determined from size of XY
34929 :
34930 : NOTES:
34931 :
34932 : 1. Track may contain either proportional or population data:
34933 : * with proportional data all rows of XY must sum to 1.0, i.e. we have
34934 : proportions instead of absolute population values
34935 : * with population data rows of XY contain population counts and generally
34936 : do not sum to 1.0 (although they still must be non-negative)
34937 :
34938 : -- ALGLIB --
34939 : Copyright 23.05.2010 by Bochkanov Sergey
34940 : *************************************************************************/
34941 0 : void mcpdaddtrack(mcpdstate* s,
34942 : /* Real */ ae_matrix* xy,
34943 : ae_int_t k,
34944 : ae_state *_state)
34945 : {
34946 : ae_int_t i;
34947 : ae_int_t j;
34948 : ae_int_t n;
34949 : double s0;
34950 : double s1;
34951 :
34952 :
34953 0 : n = s->n;
34954 0 : ae_assert(k>=0, "MCPDAddTrack: K<0", _state);
34955 0 : ae_assert(xy->cols>=n, "MCPDAddTrack: Cols(XY)<N", _state);
34956 0 : ae_assert(xy->rows>=k, "MCPDAddTrack: Rows(XY)<K", _state);
34957 0 : ae_assert(apservisfinitematrix(xy, k, n, _state), "MCPDAddTrack: XY contains infinite or NaN elements", _state);
34958 0 : for(i=0; i<=k-1; i++)
34959 : {
34960 0 : for(j=0; j<=n-1; j++)
34961 : {
34962 0 : ae_assert(ae_fp_greater_eq(xy->ptr.pp_double[i][j],(double)(0)), "MCPDAddTrack: XY contains negative elements", _state);
34963 : }
34964 : }
34965 0 : if( k<2 )
34966 : {
34967 0 : return;
34968 : }
34969 0 : if( s->data.rows<s->npairs+k-1 )
34970 : {
34971 0 : rmatrixresize(&s->data, ae_maxint(2*s->data.rows, s->npairs+k-1, _state), 2*n, _state);
34972 : }
34973 0 : for(i=0; i<=k-2; i++)
34974 : {
34975 0 : s0 = (double)(0);
34976 0 : s1 = (double)(0);
34977 0 : for(j=0; j<=n-1; j++)
34978 : {
34979 0 : if( s->states.ptr.p_int[j]>=0 )
34980 : {
34981 0 : s0 = s0+xy->ptr.pp_double[i][j];
34982 : }
34983 0 : if( s->states.ptr.p_int[j]<=0 )
34984 : {
34985 0 : s1 = s1+xy->ptr.pp_double[i+1][j];
34986 : }
34987 : }
34988 0 : if( ae_fp_greater(s0,(double)(0))&&ae_fp_greater(s1,(double)(0)) )
34989 : {
34990 0 : for(j=0; j<=n-1; j++)
34991 : {
34992 0 : if( s->states.ptr.p_int[j]>=0 )
34993 : {
34994 0 : s->data.ptr.pp_double[s->npairs][j] = xy->ptr.pp_double[i][j]/s0;
34995 : }
34996 : else
34997 : {
34998 0 : s->data.ptr.pp_double[s->npairs][j] = 0.0;
34999 : }
35000 0 : if( s->states.ptr.p_int[j]<=0 )
35001 : {
35002 0 : s->data.ptr.pp_double[s->npairs][n+j] = xy->ptr.pp_double[i+1][j]/s1;
35003 : }
35004 : else
35005 : {
35006 0 : s->data.ptr.pp_double[s->npairs][n+j] = 0.0;
35007 : }
35008 : }
35009 0 : s->npairs = s->npairs+1;
35010 : }
35011 : }
35012 : }
35013 :
35014 :
35015 : /*************************************************************************
35016 : This function is used to add equality constraints on the elements of the
35017 : transition matrix P.
35018 :
35019 : MCPD solver has four types of constraints which can be placed on P:
35020 : * user-specified equality constraints (optional)
35021 : * user-specified bound constraints (optional)
35022 : * user-specified general linear constraints (optional)
35023 : * basic constraints (always present):
35024 : * non-negativity: P[i,j]>=0
35025 : * consistency: every column of P sums to 1.0
35026 :
35027 : Final constraints which are passed to the underlying optimizer are
35028 : calculated as intersection of all present constraints. For example, you
35029 : may specify boundary constraint on P[0,0] and equality one:
35030 : 0.1<=P[0,0]<=0.9
35031 : P[0,0]=0.5
35032 : Such combination of constraints will be silently reduced to their
35033 : intersection, which is P[0,0]=0.5.
35034 :
35035 : This function can be used to place equality constraints on arbitrary
35036 : subset of elements of P. Set of constraints is specified by EC, which may
35037 : contain either NAN's or finite numbers from [0,1]. NAN denotes absence of
35038 : constraint, finite number denotes equality constraint on specific element
35039 : of P.
35040 :
35041 : You can also use MCPDAddEC() function which allows to ADD equality
35042 : constraint for one element of P without changing constraints for other
35043 : elements.
35044 :
35045 : These functions (MCPDSetEC and MCPDAddEC) interact as follows:
35046 : * there is internal matrix of equality constraints which is stored in the
35047 : MCPD solver
35048 : * MCPDSetEC() replaces this matrix by another one (SET)
35049 : * MCPDAddEC() modifies one element of this matrix and leaves other ones
35050 : unchanged (ADD)
35051 : * thus MCPDAddEC() call preserves all modifications done by previous
35052 : calls, while MCPDSetEC() completely discards all changes done to the
35053 : equality constraints.
35054 :
35055 : INPUT PARAMETERS:
35056 : S - solver
35057 : EC - equality constraints, array[N,N]. Elements of EC can be
35058 : either NAN's or finite numbers from [0,1]. NAN denotes
35059 : absence of constraints, while finite value denotes
35060 : equality constraint on the corresponding element of P.
35061 :
35062 : NOTES:
35063 :
35064 : 1. infinite values of EC will lead to exception being thrown. Values less
35065 : than 0.0 or greater than 1.0 will lead to error code being returned after
35066 : call to MCPDSolve().
35067 :
35068 : -- ALGLIB --
35069 : Copyright 23.05.2010 by Bochkanov Sergey
35070 : *************************************************************************/
35071 0 : void mcpdsetec(mcpdstate* s,
35072 : /* Real */ ae_matrix* ec,
35073 : ae_state *_state)
35074 : {
35075 : ae_int_t i;
35076 : ae_int_t j;
35077 : ae_int_t n;
35078 :
35079 :
35080 0 : n = s->n;
35081 0 : ae_assert(ec->cols>=n, "MCPDSetEC: Cols(EC)<N", _state);
35082 0 : ae_assert(ec->rows>=n, "MCPDSetEC: Rows(EC)<N", _state);
35083 0 : for(i=0; i<=n-1; i++)
35084 : {
35085 0 : for(j=0; j<=n-1; j++)
35086 : {
35087 0 : ae_assert(ae_isfinite(ec->ptr.pp_double[i][j], _state)||ae_isnan(ec->ptr.pp_double[i][j], _state), "MCPDSetEC: EC containts infinite elements", _state);
35088 0 : s->ec.ptr.pp_double[i][j] = ec->ptr.pp_double[i][j];
35089 : }
35090 : }
35091 0 : }
35092 :
35093 :
35094 : /*************************************************************************
35095 : This function is used to add equality constraints on the elements of the
35096 : transition matrix P.
35097 :
35098 : MCPD solver has four types of constraints which can be placed on P:
35099 : * user-specified equality constraints (optional)
35100 : * user-specified bound constraints (optional)
35101 : * user-specified general linear constraints (optional)
35102 : * basic constraints (always present):
35103 : * non-negativity: P[i,j]>=0
35104 : * consistency: every column of P sums to 1.0
35105 :
35106 : Final constraints which are passed to the underlying optimizer are
35107 : calculated as intersection of all present constraints. For example, you
35108 : may specify boundary constraint on P[0,0] and equality one:
35109 : 0.1<=P[0,0]<=0.9
35110 : P[0,0]=0.5
35111 : Such combination of constraints will be silently reduced to their
35112 : intersection, which is P[0,0]=0.5.
35113 :
35114 : This function can be used to ADD equality constraint for one element of P
35115 : without changing constraints for other elements.
35116 :
35117 : You can also use MCPDSetEC() function which allows you to specify
35118 : arbitrary set of equality constraints in one call.
35119 :
35120 : These functions (MCPDSetEC and MCPDAddEC) interact as follows:
35121 : * there is internal matrix of equality constraints which is stored in the
35122 : MCPD solver
35123 : * MCPDSetEC() replaces this matrix by another one (SET)
35124 : * MCPDAddEC() modifies one element of this matrix and leaves other ones
35125 : unchanged (ADD)
35126 : * thus MCPDAddEC() call preserves all modifications done by previous
35127 : calls, while MCPDSetEC() completely discards all changes done to the
35128 : equality constraints.
35129 :
35130 : INPUT PARAMETERS:
35131 : S - solver
35132 : I - row index of element being constrained
35133 : J - column index of element being constrained
35134 : C - value (constraint for P[I,J]). Can be either NAN (no
35135 : constraint) or finite value from [0,1].
35136 :
35137 : NOTES:
35138 :
35139 : 1. infinite values of C will lead to exception being thrown. Values less
35140 : than 0.0 or greater than 1.0 will lead to error code being returned after
35141 : call to MCPDSolve().
35142 :
35143 : -- ALGLIB --
35144 : Copyright 23.05.2010 by Bochkanov Sergey
35145 : *************************************************************************/
35146 0 : void mcpdaddec(mcpdstate* s,
35147 : ae_int_t i,
35148 : ae_int_t j,
35149 : double c,
35150 : ae_state *_state)
35151 : {
35152 :
35153 :
35154 0 : ae_assert(i>=0, "MCPDAddEC: I<0", _state);
35155 0 : ae_assert(i<s->n, "MCPDAddEC: I>=N", _state);
35156 0 : ae_assert(j>=0, "MCPDAddEC: J<0", _state);
35157 0 : ae_assert(j<s->n, "MCPDAddEC: J>=N", _state);
35158 0 : ae_assert(ae_isnan(c, _state)||ae_isfinite(c, _state), "MCPDAddEC: C is not finite number or NAN", _state);
35159 0 : s->ec.ptr.pp_double[i][j] = c;
35160 0 : }
35161 :
35162 :
35163 : /*************************************************************************
35164 : This function is used to add bound constraints on the elements of the
35165 : transition matrix P.
35166 :
35167 : MCPD solver has four types of constraints which can be placed on P:
35168 : * user-specified equality constraints (optional)
35169 : * user-specified bound constraints (optional)
35170 : * user-specified general linear constraints (optional)
35171 : * basic constraints (always present):
35172 : * non-negativity: P[i,j]>=0
35173 : * consistency: every column of P sums to 1.0
35174 :
35175 : Final constraints which are passed to the underlying optimizer are
35176 : calculated as intersection of all present constraints. For example, you
35177 : may specify boundary constraint on P[0,0] and equality one:
35178 : 0.1<=P[0,0]<=0.9
35179 : P[0,0]=0.5
35180 : Such combination of constraints will be silently reduced to their
35181 : intersection, which is P[0,0]=0.5.
35182 :
35183 : This function can be used to place bound constraints on arbitrary
35184 : subset of elements of P. Set of constraints is specified by BndL/BndU
35185 : matrices, which may contain arbitrary combination of finite numbers or
35186 : infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
35187 :
35188 : You can also use MCPDAddBC() function which allows to ADD bound constraint
35189 : for one element of P without changing constraints for other elements.
35190 :
35191 : These functions (MCPDSetBC and MCPDAddBC) interact as follows:
35192 : * there is internal matrix of bound constraints which is stored in the
35193 : MCPD solver
35194 : * MCPDSetBC() replaces this matrix by another one (SET)
35195 : * MCPDAddBC() modifies one element of this matrix and leaves other ones
35196 : unchanged (ADD)
35197 : * thus MCPDAddBC() call preserves all modifications done by previous
35198 : calls, while MCPDSetBC() completely discards all changes done to the
35199 : equality constraints.
35200 :
35201 : INPUT PARAMETERS:
35202 : S - solver
35203 : BndL - lower bounds constraints, array[N,N]. Elements of BndL can
35204 : be finite numbers or -INF.
35205 : BndU - upper bounds constraints, array[N,N]. Elements of BndU can
35206 : be finite numbers or +INF.
35207 :
35208 : -- ALGLIB --
35209 : Copyright 23.05.2010 by Bochkanov Sergey
35210 : *************************************************************************/
35211 0 : void mcpdsetbc(mcpdstate* s,
35212 : /* Real */ ae_matrix* bndl,
35213 : /* Real */ ae_matrix* bndu,
35214 : ae_state *_state)
35215 : {
35216 : ae_int_t i;
35217 : ae_int_t j;
35218 : ae_int_t n;
35219 :
35220 :
35221 0 : n = s->n;
35222 0 : ae_assert(bndl->cols>=n, "MCPDSetBC: Cols(BndL)<N", _state);
35223 0 : ae_assert(bndl->rows>=n, "MCPDSetBC: Rows(BndL)<N", _state);
35224 0 : ae_assert(bndu->cols>=n, "MCPDSetBC: Cols(BndU)<N", _state);
35225 0 : ae_assert(bndu->rows>=n, "MCPDSetBC: Rows(BndU)<N", _state);
35226 0 : for(i=0; i<=n-1; i++)
35227 : {
35228 0 : for(j=0; j<=n-1; j++)
35229 : {
35230 0 : ae_assert(ae_isfinite(bndl->ptr.pp_double[i][j], _state)||ae_isneginf(bndl->ptr.pp_double[i][j], _state), "MCPDSetBC: BndL containts NAN or +INF", _state);
35231 0 : ae_assert(ae_isfinite(bndu->ptr.pp_double[i][j], _state)||ae_isposinf(bndu->ptr.pp_double[i][j], _state), "MCPDSetBC: BndU containts NAN or -INF", _state);
35232 0 : s->bndl.ptr.pp_double[i][j] = bndl->ptr.pp_double[i][j];
35233 0 : s->bndu.ptr.pp_double[i][j] = bndu->ptr.pp_double[i][j];
35234 : }
35235 : }
35236 0 : }
35237 :
35238 :
35239 : /*************************************************************************
35240 : This function is used to add bound constraints on the elements of the
35241 : transition matrix P.
35242 :
35243 : MCPD solver has four types of constraints which can be placed on P:
35244 : * user-specified equality constraints (optional)
35245 : * user-specified bound constraints (optional)
35246 : * user-specified general linear constraints (optional)
35247 : * basic constraints (always present):
35248 : * non-negativity: P[i,j]>=0
35249 : * consistency: every column of P sums to 1.0
35250 :
35251 : Final constraints which are passed to the underlying optimizer are
35252 : calculated as intersection of all present constraints. For example, you
35253 : may specify boundary constraint on P[0,0] and equality one:
35254 : 0.1<=P[0,0]<=0.9
35255 : P[0,0]=0.5
35256 : Such combination of constraints will be silently reduced to their
35257 : intersection, which is P[0,0]=0.5.
35258 :
35259 : This function can be used to ADD bound constraint for one element of P
35260 : without changing constraints for other elements.
35261 :
35262 : You can also use MCPDSetBC() function which allows to place bound
35263 : constraints on arbitrary subset of elements of P. Set of constraints is
35264 : specified by BndL/BndU matrices, which may contain arbitrary combination
35265 : of finite numbers or infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
35266 :
35267 : These functions (MCPDSetBC and MCPDAddBC) interact as follows:
35268 : * there is internal matrix of bound constraints which is stored in the
35269 : MCPD solver
35270 : * MCPDSetBC() replaces this matrix by another one (SET)
35271 : * MCPDAddBC() modifies one element of this matrix and leaves other ones
35272 : unchanged (ADD)
35273 : * thus MCPDAddBC() call preserves all modifications done by previous
35274 : calls, while MCPDSetBC() completely discards all changes done to the
35275 : equality constraints.
35276 :
35277 : INPUT PARAMETERS:
35278 : S - solver
35279 : I - row index of element being constrained
35280 : J - column index of element being constrained
35281 : BndL - lower bound
35282 : BndU - upper bound
35283 :
35284 : -- ALGLIB --
35285 : Copyright 23.05.2010 by Bochkanov Sergey
35286 : *************************************************************************/
35287 0 : void mcpdaddbc(mcpdstate* s,
35288 : ae_int_t i,
35289 : ae_int_t j,
35290 : double bndl,
35291 : double bndu,
35292 : ae_state *_state)
35293 : {
35294 :
35295 :
35296 0 : ae_assert(i>=0, "MCPDAddBC: I<0", _state);
35297 0 : ae_assert(i<s->n, "MCPDAddBC: I>=N", _state);
35298 0 : ae_assert(j>=0, "MCPDAddBC: J<0", _state);
35299 0 : ae_assert(j<s->n, "MCPDAddBC: J>=N", _state);
35300 0 : ae_assert(ae_isfinite(bndl, _state)||ae_isneginf(bndl, _state), "MCPDAddBC: BndL is NAN or +INF", _state);
35301 0 : ae_assert(ae_isfinite(bndu, _state)||ae_isposinf(bndu, _state), "MCPDAddBC: BndU is NAN or -INF", _state);
35302 0 : s->bndl.ptr.pp_double[i][j] = bndl;
35303 0 : s->bndu.ptr.pp_double[i][j] = bndu;
35304 0 : }
35305 :
35306 :
35307 : /*************************************************************************
35308 : This function is used to set linear equality/inequality constraints on the
35309 : elements of the transition matrix P.
35310 :
35311 : This function can be used to set one or several general linear constraints
35312 : on the elements of P. Two types of constraints are supported:
35313 : * equality constraints
35314 : * inequality constraints (both less-or-equal and greater-or-equal)
35315 :
35316 : Coefficients of constraints are specified by matrix C (one of the
35317 : parameters). One row of C corresponds to one constraint. Because
35318 : transition matrix P has N*N elements, we need N*N columns to store all
35319 : coefficients (they are stored row by row), and one more column to store
35320 : right part - hence C has N*N+1 columns. Constraint kind is stored in the
35321 : CT array.
35322 :
35323 : Thus, I-th linear constraint is
35324 : P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
35325 : + P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
35326 : + P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
35327 : where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
35328 :
35329 : Your constraint may involve only some subset of P (less than N*N elements).
35330 : For example it can be something like
35331 : P[0,0] + P[0,1] = 0.5
35332 : In this case you still should pass matrix with N*N+1 columns, but all its
35333 : elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
35334 :
35335 : INPUT PARAMETERS:
35336 : S - solver
35337 : C - array[K,N*N+1] - coefficients of constraints
35338 : (see above for complete description)
35339 : CT - array[K] - constraint types
35340 : (see above for complete description)
35341 : K - number of equality/inequality constraints, K>=0:
35342 : * if given, only leading K elements of C/CT are used
35343 : * if not given, automatically determined from sizes of C/CT
35344 :
35345 : -- ALGLIB --
35346 : Copyright 23.05.2010 by Bochkanov Sergey
35347 : *************************************************************************/
35348 0 : void mcpdsetlc(mcpdstate* s,
35349 : /* Real */ ae_matrix* c,
35350 : /* Integer */ ae_vector* ct,
35351 : ae_int_t k,
35352 : ae_state *_state)
35353 : {
35354 : ae_int_t i;
35355 : ae_int_t j;
35356 : ae_int_t n;
35357 :
35358 :
35359 0 : n = s->n;
35360 0 : ae_assert(c->cols>=n*n+1, "MCPDSetLC: Cols(C)<N*N+1", _state);
35361 0 : ae_assert(c->rows>=k, "MCPDSetLC: Rows(C)<K", _state);
35362 0 : ae_assert(ct->cnt>=k, "MCPDSetLC: Len(CT)<K", _state);
35363 0 : ae_assert(apservisfinitematrix(c, k, n*n+1, _state), "MCPDSetLC: C contains infinite or NaN values!", _state);
35364 0 : rmatrixsetlengthatleast(&s->c, k, n*n+1, _state);
35365 0 : ivectorsetlengthatleast(&s->ct, k, _state);
35366 0 : for(i=0; i<=k-1; i++)
35367 : {
35368 0 : for(j=0; j<=n*n; j++)
35369 : {
35370 0 : s->c.ptr.pp_double[i][j] = c->ptr.pp_double[i][j];
35371 : }
35372 0 : s->ct.ptr.p_int[i] = ct->ptr.p_int[i];
35373 : }
35374 0 : s->ccnt = k;
35375 0 : }
35376 :
35377 :
35378 : /*************************************************************************
35379 : This function allows to tune amount of Tikhonov regularization being
35380 : applied to your problem.
35381 :
35382 : By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
35383 : small non-zero value, P is transition matrix, prior_P is identity matrix,
35384 : ||X||^2 is a sum of squared elements of X.
35385 :
35386 : This function allows you to change coefficient r. You can also change
35387 : prior values with MCPDSetPrior() function.
35388 :
35389 : INPUT PARAMETERS:
35390 : S - solver
35391 : V - regularization coefficient, finite non-negative value. It
35392 : is not recommended to specify zero value unless you are
35393 : pretty sure that you want it.
35394 :
35395 : -- ALGLIB --
35396 : Copyright 23.05.2010 by Bochkanov Sergey
35397 : *************************************************************************/
35398 0 : void mcpdsettikhonovregularizer(mcpdstate* s, double v, ae_state *_state)
35399 : {
35400 :
35401 :
35402 0 : ae_assert(ae_isfinite(v, _state), "MCPDSetTikhonovRegularizer: V is infinite or NAN", _state);
35403 0 : ae_assert(ae_fp_greater_eq(v,0.0), "MCPDSetTikhonovRegularizer: V is less than zero", _state);
35404 0 : s->regterm = v;
35405 0 : }
35406 :
35407 :
35408 : /*************************************************************************
35409 : This function allows to set prior values used for regularization of your
35410 : problem.
35411 :
35412 : By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
35413 : small non-zero value, P is transition matrix, prior_P is identity matrix,
35414 : ||X||^2 is a sum of squared elements of X.
35415 :
35416 : This function allows you to change prior values prior_P. You can also
35417 : change r with MCPDSetTikhonovRegularizer() function.
35418 :
35419 : INPUT PARAMETERS:
35420 : S - solver
35421 : PP - array[N,N], matrix of prior values:
35422 : 1. elements must be real numbers from [0,1]
35423 : 2. columns must sum to 1.0.
35424 : First property is checked (exception is thrown otherwise),
35425 : while second one is not checked/enforced.
35426 :
35427 : -- ALGLIB --
35428 : Copyright 23.05.2010 by Bochkanov Sergey
35429 : *************************************************************************/
35430 0 : void mcpdsetprior(mcpdstate* s,
35431 : /* Real */ ae_matrix* pp,
35432 : ae_state *_state)
35433 : {
35434 : ae_frame _frame_block;
35435 : ae_matrix _pp;
35436 : ae_int_t i;
35437 : ae_int_t j;
35438 : ae_int_t n;
35439 :
35440 0 : ae_frame_make(_state, &_frame_block);
35441 0 : memset(&_pp, 0, sizeof(_pp));
35442 0 : ae_matrix_init_copy(&_pp, pp, _state, ae_true);
35443 0 : pp = &_pp;
35444 :
35445 0 : n = s->n;
35446 0 : ae_assert(pp->cols>=n, "MCPDSetPrior: Cols(PP)<N", _state);
35447 0 : ae_assert(pp->rows>=n, "MCPDSetPrior: Rows(PP)<K", _state);
35448 0 : for(i=0; i<=n-1; i++)
35449 : {
35450 0 : for(j=0; j<=n-1; j++)
35451 : {
35452 0 : ae_assert(ae_isfinite(pp->ptr.pp_double[i][j], _state), "MCPDSetPrior: PP containts infinite elements", _state);
35453 0 : ae_assert(ae_fp_greater_eq(pp->ptr.pp_double[i][j],0.0)&&ae_fp_less_eq(pp->ptr.pp_double[i][j],1.0), "MCPDSetPrior: PP[i,j] is less than 0.0 or greater than 1.0", _state);
35454 0 : s->priorp.ptr.pp_double[i][j] = pp->ptr.pp_double[i][j];
35455 : }
35456 : }
35457 0 : ae_frame_leave(_state);
35458 0 : }
35459 :
35460 :
35461 : /*************************************************************************
35462 : This function is used to change prediction weights
35463 :
35464 : MCPD solver scales prediction errors as follows
35465 : Error(P) = ||W*(y-P*x)||^2
35466 : where
35467 : x is a system state at time t
35468 : y is a system state at time t+1
35469 : P is a transition matrix
35470 : W is a diagonal scaling matrix
35471 :
35472 : By default, weights are chosen in order to minimize relative prediction
35473 : error instead of absolute one. For example, if one component of state is
35474 : about 0.5 in magnitude and another one is about 0.05, then algorithm will
35475 : make corresponding weights equal to 2.0 and 20.0.
35476 :
35477 : INPUT PARAMETERS:
35478 : S - solver
35479 : PW - array[N], weights:
35480 : * must be non-negative values (exception will be thrown otherwise)
35481 : * zero values will be replaced by automatically chosen values
35482 :
35483 : -- ALGLIB --
35484 : Copyright 23.05.2010 by Bochkanov Sergey
35485 : *************************************************************************/
35486 0 : void mcpdsetpredictionweights(mcpdstate* s,
35487 : /* Real */ ae_vector* pw,
35488 : ae_state *_state)
35489 : {
35490 : ae_int_t i;
35491 : ae_int_t n;
35492 :
35493 :
35494 0 : n = s->n;
35495 0 : ae_assert(pw->cnt>=n, "MCPDSetPredictionWeights: Length(PW)<N", _state);
35496 0 : for(i=0; i<=n-1; i++)
35497 : {
35498 0 : ae_assert(ae_isfinite(pw->ptr.p_double[i], _state), "MCPDSetPredictionWeights: PW containts infinite or NAN elements", _state);
35499 0 : ae_assert(ae_fp_greater_eq(pw->ptr.p_double[i],(double)(0)), "MCPDSetPredictionWeights: PW containts negative elements", _state);
35500 0 : s->pw.ptr.p_double[i] = pw->ptr.p_double[i];
35501 : }
35502 0 : }
35503 :
35504 :
35505 : /*************************************************************************
35506 : This function is used to start solution of the MCPD problem.
35507 :
35508 : After return from this function, you can use MCPDResults() to get solution
35509 : and completion code.
35510 :
35511 : -- ALGLIB --
35512 : Copyright 23.05.2010 by Bochkanov Sergey
35513 : *************************************************************************/
35514 0 : void mcpdsolve(mcpdstate* s, ae_state *_state)
35515 : {
35516 : ae_int_t n;
35517 : ae_int_t npairs;
35518 : ae_int_t ccnt;
35519 : ae_int_t i;
35520 : ae_int_t j;
35521 : ae_int_t k;
35522 : ae_int_t k2;
35523 : double v;
35524 : double vv;
35525 :
35526 :
35527 0 : n = s->n;
35528 0 : npairs = s->npairs;
35529 :
35530 : /*
35531 : * init fields of S
35532 : */
35533 0 : s->repterminationtype = 0;
35534 0 : s->repinneriterationscount = 0;
35535 0 : s->repouteriterationscount = 0;
35536 0 : s->repnfev = 0;
35537 0 : for(k=0; k<=n-1; k++)
35538 : {
35539 0 : for(k2=0; k2<=n-1; k2++)
35540 : {
35541 0 : s->p.ptr.pp_double[k][k2] = _state->v_nan;
35542 : }
35543 : }
35544 :
35545 : /*
35546 : * Generate "effective" weights for prediction and calculate preconditioner
35547 : */
35548 0 : for(i=0; i<=n-1; i++)
35549 : {
35550 0 : if( ae_fp_eq(s->pw.ptr.p_double[i],(double)(0)) )
35551 : {
35552 0 : v = (double)(0);
35553 0 : k = 0;
35554 0 : for(j=0; j<=npairs-1; j++)
35555 : {
35556 0 : if( ae_fp_neq(s->data.ptr.pp_double[j][n+i],(double)(0)) )
35557 : {
35558 0 : v = v+s->data.ptr.pp_double[j][n+i];
35559 0 : k = k+1;
35560 : }
35561 : }
35562 0 : if( k!=0 )
35563 : {
35564 0 : s->effectivew.ptr.p_double[i] = k/v;
35565 : }
35566 : else
35567 : {
35568 0 : s->effectivew.ptr.p_double[i] = 1.0;
35569 : }
35570 : }
35571 : else
35572 : {
35573 0 : s->effectivew.ptr.p_double[i] = s->pw.ptr.p_double[i];
35574 : }
35575 : }
35576 0 : for(i=0; i<=n-1; i++)
35577 : {
35578 0 : for(j=0; j<=n-1; j++)
35579 : {
35580 0 : s->h.ptr.p_double[i*n+j] = 2*s->regterm;
35581 : }
35582 : }
35583 0 : for(k=0; k<=npairs-1; k++)
35584 : {
35585 0 : for(i=0; i<=n-1; i++)
35586 : {
35587 0 : for(j=0; j<=n-1; j++)
35588 : {
35589 0 : s->h.ptr.p_double[i*n+j] = s->h.ptr.p_double[i*n+j]+2*ae_sqr(s->effectivew.ptr.p_double[i], _state)*ae_sqr(s->data.ptr.pp_double[k][j], _state);
35590 : }
35591 : }
35592 : }
35593 0 : for(i=0; i<=n-1; i++)
35594 : {
35595 0 : for(j=0; j<=n-1; j++)
35596 : {
35597 0 : if( ae_fp_eq(s->h.ptr.p_double[i*n+j],(double)(0)) )
35598 : {
35599 0 : s->h.ptr.p_double[i*n+j] = (double)(1);
35600 : }
35601 : }
35602 : }
35603 :
35604 : /*
35605 : * Generate "effective" BndL/BndU
35606 : */
35607 0 : for(i=0; i<=n-1; i++)
35608 : {
35609 0 : for(j=0; j<=n-1; j++)
35610 : {
35611 :
35612 : /*
35613 : * Set default boundary constraints.
35614 : * Lower bound is always zero, upper bound is calculated
35615 : * with respect to entry/exit states.
35616 : */
35617 0 : s->effectivebndl.ptr.p_double[i*n+j] = 0.0;
35618 0 : if( s->states.ptr.p_int[i]>0||s->states.ptr.p_int[j]<0 )
35619 : {
35620 0 : s->effectivebndu.ptr.p_double[i*n+j] = 0.0;
35621 : }
35622 : else
35623 : {
35624 0 : s->effectivebndu.ptr.p_double[i*n+j] = 1.0;
35625 : }
35626 :
35627 : /*
35628 : * Calculate intersection of the default and user-specified bound constraints.
35629 : * This code checks consistency of such combination.
35630 : */
35631 0 : if( ae_isfinite(s->bndl.ptr.pp_double[i][j], _state)&&ae_fp_greater(s->bndl.ptr.pp_double[i][j],s->effectivebndl.ptr.p_double[i*n+j]) )
35632 : {
35633 0 : s->effectivebndl.ptr.p_double[i*n+j] = s->bndl.ptr.pp_double[i][j];
35634 : }
35635 0 : if( ae_isfinite(s->bndu.ptr.pp_double[i][j], _state)&&ae_fp_less(s->bndu.ptr.pp_double[i][j],s->effectivebndu.ptr.p_double[i*n+j]) )
35636 : {
35637 0 : s->effectivebndu.ptr.p_double[i*n+j] = s->bndu.ptr.pp_double[i][j];
35638 : }
35639 0 : if( ae_fp_greater(s->effectivebndl.ptr.p_double[i*n+j],s->effectivebndu.ptr.p_double[i*n+j]) )
35640 : {
35641 0 : s->repterminationtype = -3;
35642 0 : return;
35643 : }
35644 :
35645 : /*
35646 : * Calculate intersection of the effective bound constraints
35647 : * and user-specified equality constraints.
35648 : * This code checks consistency of such combination.
35649 : */
35650 0 : if( ae_isfinite(s->ec.ptr.pp_double[i][j], _state) )
35651 : {
35652 0 : if( ae_fp_less(s->ec.ptr.pp_double[i][j],s->effectivebndl.ptr.p_double[i*n+j])||ae_fp_greater(s->ec.ptr.pp_double[i][j],s->effectivebndu.ptr.p_double[i*n+j]) )
35653 : {
35654 0 : s->repterminationtype = -3;
35655 0 : return;
35656 : }
35657 0 : s->effectivebndl.ptr.p_double[i*n+j] = s->ec.ptr.pp_double[i][j];
35658 0 : s->effectivebndu.ptr.p_double[i*n+j] = s->ec.ptr.pp_double[i][j];
35659 : }
35660 : }
35661 : }
35662 :
35663 : /*
35664 : * Generate linear constraints:
35665 : * * "default" sums-to-one constraints (not generated for "exit" states)
35666 : */
35667 0 : rmatrixsetlengthatleast(&s->effectivec, s->ccnt+n, n*n+1, _state);
35668 0 : ivectorsetlengthatleast(&s->effectivect, s->ccnt+n, _state);
35669 0 : ccnt = s->ccnt;
35670 0 : for(i=0; i<=s->ccnt-1; i++)
35671 : {
35672 0 : for(j=0; j<=n*n; j++)
35673 : {
35674 0 : s->effectivec.ptr.pp_double[i][j] = s->c.ptr.pp_double[i][j];
35675 : }
35676 0 : s->effectivect.ptr.p_int[i] = s->ct.ptr.p_int[i];
35677 : }
35678 0 : for(i=0; i<=n-1; i++)
35679 : {
35680 0 : if( s->states.ptr.p_int[i]>=0 )
35681 : {
35682 0 : for(k=0; k<=n*n-1; k++)
35683 : {
35684 0 : s->effectivec.ptr.pp_double[ccnt][k] = (double)(0);
35685 : }
35686 0 : for(k=0; k<=n-1; k++)
35687 : {
35688 0 : s->effectivec.ptr.pp_double[ccnt][k*n+i] = (double)(1);
35689 : }
35690 0 : s->effectivec.ptr.pp_double[ccnt][n*n] = 1.0;
35691 0 : s->effectivect.ptr.p_int[ccnt] = 0;
35692 0 : ccnt = ccnt+1;
35693 : }
35694 : }
35695 :
35696 : /*
35697 : * create optimizer
35698 : */
35699 0 : for(i=0; i<=n-1; i++)
35700 : {
35701 0 : for(j=0; j<=n-1; j++)
35702 : {
35703 0 : s->tmpp.ptr.p_double[i*n+j] = (double)1/(double)n;
35704 : }
35705 : }
35706 0 : minbleicrestartfrom(&s->bs, &s->tmpp, _state);
35707 0 : minbleicsetbc(&s->bs, &s->effectivebndl, &s->effectivebndu, _state);
35708 0 : minbleicsetlc(&s->bs, &s->effectivec, &s->effectivect, ccnt, _state);
35709 0 : minbleicsetcond(&s->bs, 0.0, 0.0, mcpd_xtol, 0, _state);
35710 0 : minbleicsetprecdiag(&s->bs, &s->h, _state);
35711 :
35712 : /*
35713 : * solve problem
35714 : */
35715 0 : while(minbleiciteration(&s->bs, _state))
35716 : {
35717 0 : ae_assert(s->bs.needfg, "MCPDSolve: internal error", _state);
35718 0 : if( s->bs.needfg )
35719 : {
35720 :
35721 : /*
35722 : * Calculate regularization term
35723 : */
35724 0 : s->bs.f = 0.0;
35725 0 : vv = s->regterm;
35726 0 : for(i=0; i<=n-1; i++)
35727 : {
35728 0 : for(j=0; j<=n-1; j++)
35729 : {
35730 0 : s->bs.f = s->bs.f+vv*ae_sqr(s->bs.x.ptr.p_double[i*n+j]-s->priorp.ptr.pp_double[i][j], _state);
35731 0 : s->bs.g.ptr.p_double[i*n+j] = 2*vv*(s->bs.x.ptr.p_double[i*n+j]-s->priorp.ptr.pp_double[i][j]);
35732 : }
35733 : }
35734 :
35735 : /*
35736 : * calculate prediction error/gradient for K-th pair
35737 : */
35738 0 : for(k=0; k<=npairs-1; k++)
35739 : {
35740 0 : for(i=0; i<=n-1; i++)
35741 : {
35742 0 : v = ae_v_dotproduct(&s->bs.x.ptr.p_double[i*n], 1, &s->data.ptr.pp_double[k][0], 1, ae_v_len(i*n,i*n+n-1));
35743 0 : vv = s->effectivew.ptr.p_double[i];
35744 0 : s->bs.f = s->bs.f+ae_sqr(vv*(v-s->data.ptr.pp_double[k][n+i]), _state);
35745 0 : for(j=0; j<=n-1; j++)
35746 : {
35747 0 : s->bs.g.ptr.p_double[i*n+j] = s->bs.g.ptr.p_double[i*n+j]+2*vv*vv*(v-s->data.ptr.pp_double[k][n+i])*s->data.ptr.pp_double[k][j];
35748 : }
35749 : }
35750 : }
35751 :
35752 : /*
35753 : * continue
35754 : */
35755 0 : continue;
35756 : }
35757 : }
35758 0 : minbleicresultsbuf(&s->bs, &s->tmpp, &s->br, _state);
35759 0 : for(i=0; i<=n-1; i++)
35760 : {
35761 0 : for(j=0; j<=n-1; j++)
35762 : {
35763 0 : s->p.ptr.pp_double[i][j] = s->tmpp.ptr.p_double[i*n+j];
35764 : }
35765 : }
35766 0 : s->repterminationtype = s->br.terminationtype;
35767 0 : s->repinneriterationscount = s->br.inneriterationscount;
35768 0 : s->repouteriterationscount = s->br.outeriterationscount;
35769 0 : s->repnfev = s->br.nfev;
35770 : }
35771 :
35772 :
35773 : /*************************************************************************
35774 : MCPD results
35775 :
35776 : INPUT PARAMETERS:
35777 : State - algorithm state
35778 :
35779 : OUTPUT PARAMETERS:
35780 : P - array[N,N], transition matrix
35781 : Rep - optimization report. You should check Rep.TerminationType
35782 : in order to distinguish successful termination from
35783 : unsuccessful one. Speaking short, positive values denote
35784 : success, negative ones are failures.
35785 : More information about fields of this structure can be
35786 : found in the comments on MCPDReport datatype.
35787 :
35788 :
35789 : -- ALGLIB --
35790 : Copyright 23.05.2010 by Bochkanov Sergey
35791 : *************************************************************************/
35792 0 : void mcpdresults(mcpdstate* s,
35793 : /* Real */ ae_matrix* p,
35794 : mcpdreport* rep,
35795 : ae_state *_state)
35796 : {
35797 : ae_int_t i;
35798 : ae_int_t j;
35799 :
35800 0 : ae_matrix_clear(p);
35801 0 : _mcpdreport_clear(rep);
35802 :
35803 0 : ae_matrix_set_length(p, s->n, s->n, _state);
35804 0 : for(i=0; i<=s->n-1; i++)
35805 : {
35806 0 : for(j=0; j<=s->n-1; j++)
35807 : {
35808 0 : p->ptr.pp_double[i][j] = s->p.ptr.pp_double[i][j];
35809 : }
35810 : }
35811 0 : rep->terminationtype = s->repterminationtype;
35812 0 : rep->inneriterationscount = s->repinneriterationscount;
35813 0 : rep->outeriterationscount = s->repouteriterationscount;
35814 0 : rep->nfev = s->repnfev;
35815 0 : }
35816 :
35817 :
35818 : /*************************************************************************
35819 : Internal initialization function
35820 :
35821 : -- ALGLIB --
35822 : Copyright 23.05.2010 by Bochkanov Sergey
35823 : *************************************************************************/
35824 0 : static void mcpd_mcpdinit(ae_int_t n,
35825 : ae_int_t entrystate,
35826 : ae_int_t exitstate,
35827 : mcpdstate* s,
35828 : ae_state *_state)
35829 : {
35830 : ae_int_t i;
35831 : ae_int_t j;
35832 :
35833 :
35834 0 : ae_assert(n>=1, "MCPDCreate: N<1", _state);
35835 0 : s->n = n;
35836 0 : ae_vector_set_length(&s->states, n, _state);
35837 0 : for(i=0; i<=n-1; i++)
35838 : {
35839 0 : s->states.ptr.p_int[i] = 0;
35840 : }
35841 0 : if( entrystate>=0 )
35842 : {
35843 0 : s->states.ptr.p_int[entrystate] = 1;
35844 : }
35845 0 : if( exitstate>=0 )
35846 : {
35847 0 : s->states.ptr.p_int[exitstate] = -1;
35848 : }
35849 0 : s->npairs = 0;
35850 0 : s->regterm = 1.0E-8;
35851 0 : s->ccnt = 0;
35852 0 : ae_matrix_set_length(&s->p, n, n, _state);
35853 0 : ae_matrix_set_length(&s->ec, n, n, _state);
35854 0 : ae_matrix_set_length(&s->bndl, n, n, _state);
35855 0 : ae_matrix_set_length(&s->bndu, n, n, _state);
35856 0 : ae_vector_set_length(&s->pw, n, _state);
35857 0 : ae_matrix_set_length(&s->priorp, n, n, _state);
35858 0 : ae_vector_set_length(&s->tmpp, n*n, _state);
35859 0 : ae_vector_set_length(&s->effectivew, n, _state);
35860 0 : ae_vector_set_length(&s->effectivebndl, n*n, _state);
35861 0 : ae_vector_set_length(&s->effectivebndu, n*n, _state);
35862 0 : ae_vector_set_length(&s->h, n*n, _state);
35863 0 : for(i=0; i<=n-1; i++)
35864 : {
35865 0 : for(j=0; j<=n-1; j++)
35866 : {
35867 0 : s->p.ptr.pp_double[i][j] = 0.0;
35868 0 : s->priorp.ptr.pp_double[i][j] = 0.0;
35869 0 : s->bndl.ptr.pp_double[i][j] = _state->v_neginf;
35870 0 : s->bndu.ptr.pp_double[i][j] = _state->v_posinf;
35871 0 : s->ec.ptr.pp_double[i][j] = _state->v_nan;
35872 : }
35873 0 : s->pw.ptr.p_double[i] = 0.0;
35874 0 : s->priorp.ptr.pp_double[i][i] = 1.0;
35875 : }
35876 0 : ae_matrix_set_length(&s->data, 1, 2*n, _state);
35877 0 : for(i=0; i<=2*n-1; i++)
35878 : {
35879 0 : s->data.ptr.pp_double[0][i] = 0.0;
35880 : }
35881 0 : for(i=0; i<=n*n-1; i++)
35882 : {
35883 0 : s->tmpp.ptr.p_double[i] = 0.0;
35884 : }
35885 0 : minbleiccreate(n*n, &s->tmpp, &s->bs, _state);
35886 0 : }
35887 :
35888 :
35889 0 : void _mcpdstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
35890 : {
35891 0 : mcpdstate *p = (mcpdstate*)_p;
35892 0 : ae_touch_ptr((void*)p);
35893 0 : ae_vector_init(&p->states, 0, DT_INT, _state, make_automatic);
35894 0 : ae_matrix_init(&p->data, 0, 0, DT_REAL, _state, make_automatic);
35895 0 : ae_matrix_init(&p->ec, 0, 0, DT_REAL, _state, make_automatic);
35896 0 : ae_matrix_init(&p->bndl, 0, 0, DT_REAL, _state, make_automatic);
35897 0 : ae_matrix_init(&p->bndu, 0, 0, DT_REAL, _state, make_automatic);
35898 0 : ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
35899 0 : ae_vector_init(&p->ct, 0, DT_INT, _state, make_automatic);
35900 0 : ae_vector_init(&p->pw, 0, DT_REAL, _state, make_automatic);
35901 0 : ae_matrix_init(&p->priorp, 0, 0, DT_REAL, _state, make_automatic);
35902 0 : _minbleicstate_init(&p->bs, _state, make_automatic);
35903 0 : _minbleicreport_init(&p->br, _state, make_automatic);
35904 0 : ae_vector_init(&p->tmpp, 0, DT_REAL, _state, make_automatic);
35905 0 : ae_vector_init(&p->effectivew, 0, DT_REAL, _state, make_automatic);
35906 0 : ae_vector_init(&p->effectivebndl, 0, DT_REAL, _state, make_automatic);
35907 0 : ae_vector_init(&p->effectivebndu, 0, DT_REAL, _state, make_automatic);
35908 0 : ae_matrix_init(&p->effectivec, 0, 0, DT_REAL, _state, make_automatic);
35909 0 : ae_vector_init(&p->effectivect, 0, DT_INT, _state, make_automatic);
35910 0 : ae_vector_init(&p->h, 0, DT_REAL, _state, make_automatic);
35911 0 : ae_matrix_init(&p->p, 0, 0, DT_REAL, _state, make_automatic);
35912 0 : }
35913 :
35914 :
35915 0 : void _mcpdstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
35916 : {
35917 0 : mcpdstate *dst = (mcpdstate*)_dst;
35918 0 : mcpdstate *src = (mcpdstate*)_src;
35919 0 : dst->n = src->n;
35920 0 : ae_vector_init_copy(&dst->states, &src->states, _state, make_automatic);
35921 0 : dst->npairs = src->npairs;
35922 0 : ae_matrix_init_copy(&dst->data, &src->data, _state, make_automatic);
35923 0 : ae_matrix_init_copy(&dst->ec, &src->ec, _state, make_automatic);
35924 0 : ae_matrix_init_copy(&dst->bndl, &src->bndl, _state, make_automatic);
35925 0 : ae_matrix_init_copy(&dst->bndu, &src->bndu, _state, make_automatic);
35926 0 : ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
35927 0 : ae_vector_init_copy(&dst->ct, &src->ct, _state, make_automatic);
35928 0 : dst->ccnt = src->ccnt;
35929 0 : ae_vector_init_copy(&dst->pw, &src->pw, _state, make_automatic);
35930 0 : ae_matrix_init_copy(&dst->priorp, &src->priorp, _state, make_automatic);
35931 0 : dst->regterm = src->regterm;
35932 0 : _minbleicstate_init_copy(&dst->bs, &src->bs, _state, make_automatic);
35933 0 : dst->repinneriterationscount = src->repinneriterationscount;
35934 0 : dst->repouteriterationscount = src->repouteriterationscount;
35935 0 : dst->repnfev = src->repnfev;
35936 0 : dst->repterminationtype = src->repterminationtype;
35937 0 : _minbleicreport_init_copy(&dst->br, &src->br, _state, make_automatic);
35938 0 : ae_vector_init_copy(&dst->tmpp, &src->tmpp, _state, make_automatic);
35939 0 : ae_vector_init_copy(&dst->effectivew, &src->effectivew, _state, make_automatic);
35940 0 : ae_vector_init_copy(&dst->effectivebndl, &src->effectivebndl, _state, make_automatic);
35941 0 : ae_vector_init_copy(&dst->effectivebndu, &src->effectivebndu, _state, make_automatic);
35942 0 : ae_matrix_init_copy(&dst->effectivec, &src->effectivec, _state, make_automatic);
35943 0 : ae_vector_init_copy(&dst->effectivect, &src->effectivect, _state, make_automatic);
35944 0 : ae_vector_init_copy(&dst->h, &src->h, _state, make_automatic);
35945 0 : ae_matrix_init_copy(&dst->p, &src->p, _state, make_automatic);
35946 0 : }
35947 :
35948 :
35949 0 : void _mcpdstate_clear(void* _p)
35950 : {
35951 0 : mcpdstate *p = (mcpdstate*)_p;
35952 0 : ae_touch_ptr((void*)p);
35953 0 : ae_vector_clear(&p->states);
35954 0 : ae_matrix_clear(&p->data);
35955 0 : ae_matrix_clear(&p->ec);
35956 0 : ae_matrix_clear(&p->bndl);
35957 0 : ae_matrix_clear(&p->bndu);
35958 0 : ae_matrix_clear(&p->c);
35959 0 : ae_vector_clear(&p->ct);
35960 0 : ae_vector_clear(&p->pw);
35961 0 : ae_matrix_clear(&p->priorp);
35962 0 : _minbleicstate_clear(&p->bs);
35963 0 : _minbleicreport_clear(&p->br);
35964 0 : ae_vector_clear(&p->tmpp);
35965 0 : ae_vector_clear(&p->effectivew);
35966 0 : ae_vector_clear(&p->effectivebndl);
35967 0 : ae_vector_clear(&p->effectivebndu);
35968 0 : ae_matrix_clear(&p->effectivec);
35969 0 : ae_vector_clear(&p->effectivect);
35970 0 : ae_vector_clear(&p->h);
35971 0 : ae_matrix_clear(&p->p);
35972 0 : }
35973 :
35974 :
35975 0 : void _mcpdstate_destroy(void* _p)
35976 : {
35977 0 : mcpdstate *p = (mcpdstate*)_p;
35978 0 : ae_touch_ptr((void*)p);
35979 0 : ae_vector_destroy(&p->states);
35980 0 : ae_matrix_destroy(&p->data);
35981 0 : ae_matrix_destroy(&p->ec);
35982 0 : ae_matrix_destroy(&p->bndl);
35983 0 : ae_matrix_destroy(&p->bndu);
35984 0 : ae_matrix_destroy(&p->c);
35985 0 : ae_vector_destroy(&p->ct);
35986 0 : ae_vector_destroy(&p->pw);
35987 0 : ae_matrix_destroy(&p->priorp);
35988 0 : _minbleicstate_destroy(&p->bs);
35989 0 : _minbleicreport_destroy(&p->br);
35990 0 : ae_vector_destroy(&p->tmpp);
35991 0 : ae_vector_destroy(&p->effectivew);
35992 0 : ae_vector_destroy(&p->effectivebndl);
35993 0 : ae_vector_destroy(&p->effectivebndu);
35994 0 : ae_matrix_destroy(&p->effectivec);
35995 0 : ae_vector_destroy(&p->effectivect);
35996 0 : ae_vector_destroy(&p->h);
35997 0 : ae_matrix_destroy(&p->p);
35998 0 : }
35999 :
36000 :
36001 0 : void _mcpdreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
36002 : {
36003 0 : mcpdreport *p = (mcpdreport*)_p;
36004 0 : ae_touch_ptr((void*)p);
36005 0 : }
36006 :
36007 :
36008 0 : void _mcpdreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
36009 : {
36010 0 : mcpdreport *dst = (mcpdreport*)_dst;
36011 0 : mcpdreport *src = (mcpdreport*)_src;
36012 0 : dst->inneriterationscount = src->inneriterationscount;
36013 0 : dst->outeriterationscount = src->outeriterationscount;
36014 0 : dst->nfev = src->nfev;
36015 0 : dst->terminationtype = src->terminationtype;
36016 0 : }
36017 :
36018 :
36019 0 : void _mcpdreport_clear(void* _p)
36020 : {
36021 0 : mcpdreport *p = (mcpdreport*)_p;
36022 0 : ae_touch_ptr((void*)p);
36023 0 : }
36024 :
36025 :
36026 0 : void _mcpdreport_destroy(void* _p)
36027 : {
36028 0 : mcpdreport *p = (mcpdreport*)_p;
36029 0 : ae_touch_ptr((void*)p);
36030 0 : }
36031 :
36032 :
36033 : #endif
36034 : #if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
36035 :
36036 :
36037 : /*************************************************************************
36038 : Like MLPCreate0, but for ensembles.
36039 :
36040 : -- ALGLIB --
36041 : Copyright 18.02.2009 by Bochkanov Sergey
36042 : *************************************************************************/
36043 0 : void mlpecreate0(ae_int_t nin,
36044 : ae_int_t nout,
36045 : ae_int_t ensemblesize,
36046 : mlpensemble* ensemble,
36047 : ae_state *_state)
36048 : {
36049 : ae_frame _frame_block;
36050 : multilayerperceptron net;
36051 :
36052 0 : ae_frame_make(_state, &_frame_block);
36053 0 : memset(&net, 0, sizeof(net));
36054 0 : _mlpensemble_clear(ensemble);
36055 0 : _multilayerperceptron_init(&net, _state, ae_true);
36056 :
36057 0 : mlpcreate0(nin, nout, &net, _state);
36058 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36059 0 : ae_frame_leave(_state);
36060 0 : }
36061 :
36062 :
36063 : /*************************************************************************
36064 : Like MLPCreate1, but for ensembles.
36065 :
36066 : -- ALGLIB --
36067 : Copyright 18.02.2009 by Bochkanov Sergey
36068 : *************************************************************************/
36069 0 : void mlpecreate1(ae_int_t nin,
36070 : ae_int_t nhid,
36071 : ae_int_t nout,
36072 : ae_int_t ensemblesize,
36073 : mlpensemble* ensemble,
36074 : ae_state *_state)
36075 : {
36076 : ae_frame _frame_block;
36077 : multilayerperceptron net;
36078 :
36079 0 : ae_frame_make(_state, &_frame_block);
36080 0 : memset(&net, 0, sizeof(net));
36081 0 : _mlpensemble_clear(ensemble);
36082 0 : _multilayerperceptron_init(&net, _state, ae_true);
36083 :
36084 0 : mlpcreate1(nin, nhid, nout, &net, _state);
36085 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36086 0 : ae_frame_leave(_state);
36087 0 : }
36088 :
36089 :
36090 : /*************************************************************************
36091 : Like MLPCreate2, but for ensembles.
36092 :
36093 : -- ALGLIB --
36094 : Copyright 18.02.2009 by Bochkanov Sergey
36095 : *************************************************************************/
36096 0 : void mlpecreate2(ae_int_t nin,
36097 : ae_int_t nhid1,
36098 : ae_int_t nhid2,
36099 : ae_int_t nout,
36100 : ae_int_t ensemblesize,
36101 : mlpensemble* ensemble,
36102 : ae_state *_state)
36103 : {
36104 : ae_frame _frame_block;
36105 : multilayerperceptron net;
36106 :
36107 0 : ae_frame_make(_state, &_frame_block);
36108 0 : memset(&net, 0, sizeof(net));
36109 0 : _mlpensemble_clear(ensemble);
36110 0 : _multilayerperceptron_init(&net, _state, ae_true);
36111 :
36112 0 : mlpcreate2(nin, nhid1, nhid2, nout, &net, _state);
36113 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36114 0 : ae_frame_leave(_state);
36115 0 : }
36116 :
36117 :
36118 : /*************************************************************************
36119 : Like MLPCreateB0, but for ensembles.
36120 :
36121 : -- ALGLIB --
36122 : Copyright 18.02.2009 by Bochkanov Sergey
36123 : *************************************************************************/
36124 0 : void mlpecreateb0(ae_int_t nin,
36125 : ae_int_t nout,
36126 : double b,
36127 : double d,
36128 : ae_int_t ensemblesize,
36129 : mlpensemble* ensemble,
36130 : ae_state *_state)
36131 : {
36132 : ae_frame _frame_block;
36133 : multilayerperceptron net;
36134 :
36135 0 : ae_frame_make(_state, &_frame_block);
36136 0 : memset(&net, 0, sizeof(net));
36137 0 : _mlpensemble_clear(ensemble);
36138 0 : _multilayerperceptron_init(&net, _state, ae_true);
36139 :
36140 0 : mlpcreateb0(nin, nout, b, d, &net, _state);
36141 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36142 0 : ae_frame_leave(_state);
36143 0 : }
36144 :
36145 :
36146 : /*************************************************************************
36147 : Like MLPCreateB1, but for ensembles.
36148 :
36149 : -- ALGLIB --
36150 : Copyright 18.02.2009 by Bochkanov Sergey
36151 : *************************************************************************/
36152 0 : void mlpecreateb1(ae_int_t nin,
36153 : ae_int_t nhid,
36154 : ae_int_t nout,
36155 : double b,
36156 : double d,
36157 : ae_int_t ensemblesize,
36158 : mlpensemble* ensemble,
36159 : ae_state *_state)
36160 : {
36161 : ae_frame _frame_block;
36162 : multilayerperceptron net;
36163 :
36164 0 : ae_frame_make(_state, &_frame_block);
36165 0 : memset(&net, 0, sizeof(net));
36166 0 : _mlpensemble_clear(ensemble);
36167 0 : _multilayerperceptron_init(&net, _state, ae_true);
36168 :
36169 0 : mlpcreateb1(nin, nhid, nout, b, d, &net, _state);
36170 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36171 0 : ae_frame_leave(_state);
36172 0 : }
36173 :
36174 :
36175 : /*************************************************************************
36176 : Like MLPCreateB2, but for ensembles.
36177 :
36178 : -- ALGLIB --
36179 : Copyright 18.02.2009 by Bochkanov Sergey
36180 : *************************************************************************/
36181 0 : void mlpecreateb2(ae_int_t nin,
36182 : ae_int_t nhid1,
36183 : ae_int_t nhid2,
36184 : ae_int_t nout,
36185 : double b,
36186 : double d,
36187 : ae_int_t ensemblesize,
36188 : mlpensemble* ensemble,
36189 : ae_state *_state)
36190 : {
36191 : ae_frame _frame_block;
36192 : multilayerperceptron net;
36193 :
36194 0 : ae_frame_make(_state, &_frame_block);
36195 0 : memset(&net, 0, sizeof(net));
36196 0 : _mlpensemble_clear(ensemble);
36197 0 : _multilayerperceptron_init(&net, _state, ae_true);
36198 :
36199 0 : mlpcreateb2(nin, nhid1, nhid2, nout, b, d, &net, _state);
36200 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36201 0 : ae_frame_leave(_state);
36202 0 : }
36203 :
36204 :
36205 : /*************************************************************************
36206 : Like MLPCreateR0, but for ensembles.
36207 :
36208 : -- ALGLIB --
36209 : Copyright 18.02.2009 by Bochkanov Sergey
36210 : *************************************************************************/
36211 0 : void mlpecreater0(ae_int_t nin,
36212 : ae_int_t nout,
36213 : double a,
36214 : double b,
36215 : ae_int_t ensemblesize,
36216 : mlpensemble* ensemble,
36217 : ae_state *_state)
36218 : {
36219 : ae_frame _frame_block;
36220 : multilayerperceptron net;
36221 :
36222 0 : ae_frame_make(_state, &_frame_block);
36223 0 : memset(&net, 0, sizeof(net));
36224 0 : _mlpensemble_clear(ensemble);
36225 0 : _multilayerperceptron_init(&net, _state, ae_true);
36226 :
36227 0 : mlpcreater0(nin, nout, a, b, &net, _state);
36228 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36229 0 : ae_frame_leave(_state);
36230 0 : }
36231 :
36232 :
36233 : /*************************************************************************
36234 : Like MLPCreateR1, but for ensembles.
36235 :
36236 : -- ALGLIB --
36237 : Copyright 18.02.2009 by Bochkanov Sergey
36238 : *************************************************************************/
36239 0 : void mlpecreater1(ae_int_t nin,
36240 : ae_int_t nhid,
36241 : ae_int_t nout,
36242 : double a,
36243 : double b,
36244 : ae_int_t ensemblesize,
36245 : mlpensemble* ensemble,
36246 : ae_state *_state)
36247 : {
36248 : ae_frame _frame_block;
36249 : multilayerperceptron net;
36250 :
36251 0 : ae_frame_make(_state, &_frame_block);
36252 0 : memset(&net, 0, sizeof(net));
36253 0 : _mlpensemble_clear(ensemble);
36254 0 : _multilayerperceptron_init(&net, _state, ae_true);
36255 :
36256 0 : mlpcreater1(nin, nhid, nout, a, b, &net, _state);
36257 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36258 0 : ae_frame_leave(_state);
36259 0 : }
36260 :
36261 :
36262 : /*************************************************************************
36263 : Like MLPCreateR2, but for ensembles.
36264 :
36265 : -- ALGLIB --
36266 : Copyright 18.02.2009 by Bochkanov Sergey
36267 : *************************************************************************/
36268 0 : void mlpecreater2(ae_int_t nin,
36269 : ae_int_t nhid1,
36270 : ae_int_t nhid2,
36271 : ae_int_t nout,
36272 : double a,
36273 : double b,
36274 : ae_int_t ensemblesize,
36275 : mlpensemble* ensemble,
36276 : ae_state *_state)
36277 : {
36278 : ae_frame _frame_block;
36279 : multilayerperceptron net;
36280 :
36281 0 : ae_frame_make(_state, &_frame_block);
36282 0 : memset(&net, 0, sizeof(net));
36283 0 : _mlpensemble_clear(ensemble);
36284 0 : _multilayerperceptron_init(&net, _state, ae_true);
36285 :
36286 0 : mlpcreater2(nin, nhid1, nhid2, nout, a, b, &net, _state);
36287 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36288 0 : ae_frame_leave(_state);
36289 0 : }
36290 :
36291 :
36292 : /*************************************************************************
36293 : Like MLPCreateC0, but for ensembles.
36294 :
36295 : -- ALGLIB --
36296 : Copyright 18.02.2009 by Bochkanov Sergey
36297 : *************************************************************************/
36298 0 : void mlpecreatec0(ae_int_t nin,
36299 : ae_int_t nout,
36300 : ae_int_t ensemblesize,
36301 : mlpensemble* ensemble,
36302 : ae_state *_state)
36303 : {
36304 : ae_frame _frame_block;
36305 : multilayerperceptron net;
36306 :
36307 0 : ae_frame_make(_state, &_frame_block);
36308 0 : memset(&net, 0, sizeof(net));
36309 0 : _mlpensemble_clear(ensemble);
36310 0 : _multilayerperceptron_init(&net, _state, ae_true);
36311 :
36312 0 : mlpcreatec0(nin, nout, &net, _state);
36313 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36314 0 : ae_frame_leave(_state);
36315 0 : }
36316 :
36317 :
36318 : /*************************************************************************
36319 : Like MLPCreateC1, but for ensembles.
36320 :
36321 : -- ALGLIB --
36322 : Copyright 18.02.2009 by Bochkanov Sergey
36323 : *************************************************************************/
36324 0 : void mlpecreatec1(ae_int_t nin,
36325 : ae_int_t nhid,
36326 : ae_int_t nout,
36327 : ae_int_t ensemblesize,
36328 : mlpensemble* ensemble,
36329 : ae_state *_state)
36330 : {
36331 : ae_frame _frame_block;
36332 : multilayerperceptron net;
36333 :
36334 0 : ae_frame_make(_state, &_frame_block);
36335 0 : memset(&net, 0, sizeof(net));
36336 0 : _mlpensemble_clear(ensemble);
36337 0 : _multilayerperceptron_init(&net, _state, ae_true);
36338 :
36339 0 : mlpcreatec1(nin, nhid, nout, &net, _state);
36340 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36341 0 : ae_frame_leave(_state);
36342 0 : }
36343 :
36344 :
36345 : /*************************************************************************
36346 : Like MLPCreateC2, but for ensembles.
36347 :
36348 : -- ALGLIB --
36349 : Copyright 18.02.2009 by Bochkanov Sergey
36350 : *************************************************************************/
36351 0 : void mlpecreatec2(ae_int_t nin,
36352 : ae_int_t nhid1,
36353 : ae_int_t nhid2,
36354 : ae_int_t nout,
36355 : ae_int_t ensemblesize,
36356 : mlpensemble* ensemble,
36357 : ae_state *_state)
36358 : {
36359 : ae_frame _frame_block;
36360 : multilayerperceptron net;
36361 :
36362 0 : ae_frame_make(_state, &_frame_block);
36363 0 : memset(&net, 0, sizeof(net));
36364 0 : _mlpensemble_clear(ensemble);
36365 0 : _multilayerperceptron_init(&net, _state, ae_true);
36366 :
36367 0 : mlpcreatec2(nin, nhid1, nhid2, nout, &net, _state);
36368 0 : mlpecreatefromnetwork(&net, ensemblesize, ensemble, _state);
36369 0 : ae_frame_leave(_state);
36370 0 : }
36371 :
36372 :
36373 : /*************************************************************************
36374 : Creates ensemble from network. Only network geometry is copied.
36375 :
36376 : -- ALGLIB --
36377 : Copyright 17.02.2009 by Bochkanov Sergey
36378 : *************************************************************************/
36379 0 : void mlpecreatefromnetwork(multilayerperceptron* network,
36380 : ae_int_t ensemblesize,
36381 : mlpensemble* ensemble,
36382 : ae_state *_state)
36383 : {
36384 : ae_int_t i;
36385 : ae_int_t ccount;
36386 : ae_int_t wcount;
36387 :
36388 0 : _mlpensemble_clear(ensemble);
36389 :
36390 0 : ae_assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!", _state);
36391 :
36392 : /*
36393 : * Copy network
36394 : */
36395 0 : mlpcopy(network, &ensemble->network, _state);
36396 :
36397 : /*
36398 : * network properties
36399 : */
36400 0 : if( mlpissoftmax(network, _state) )
36401 : {
36402 0 : ccount = mlpgetinputscount(&ensemble->network, _state);
36403 : }
36404 : else
36405 : {
36406 0 : ccount = mlpgetinputscount(&ensemble->network, _state)+mlpgetoutputscount(&ensemble->network, _state);
36407 : }
36408 0 : wcount = mlpgetweightscount(&ensemble->network, _state);
36409 0 : ensemble->ensemblesize = ensemblesize;
36410 :
36411 : /*
36412 : * weights, means, sigmas
36413 : */
36414 0 : ae_vector_set_length(&ensemble->weights, ensemblesize*wcount, _state);
36415 0 : ae_vector_set_length(&ensemble->columnmeans, ensemblesize*ccount, _state);
36416 0 : ae_vector_set_length(&ensemble->columnsigmas, ensemblesize*ccount, _state);
36417 0 : for(i=0; i<=ensemblesize*wcount-1; i++)
36418 : {
36419 0 : ensemble->weights.ptr.p_double[i] = ae_randomreal(_state)-0.5;
36420 : }
36421 0 : for(i=0; i<=ensemblesize-1; i++)
36422 : {
36423 0 : ae_v_move(&ensemble->columnmeans.ptr.p_double[i*ccount], 1, &network->columnmeans.ptr.p_double[0], 1, ae_v_len(i*ccount,(i+1)*ccount-1));
36424 0 : ae_v_move(&ensemble->columnsigmas.ptr.p_double[i*ccount], 1, &network->columnsigmas.ptr.p_double[0], 1, ae_v_len(i*ccount,(i+1)*ccount-1));
36425 : }
36426 :
36427 : /*
36428 : * temporaries, internal buffers
36429 : */
36430 0 : ae_vector_set_length(&ensemble->y, mlpgetoutputscount(&ensemble->network, _state), _state);
36431 0 : }
36432 :
36433 :
36434 : /*************************************************************************
36435 : Copying of MLPEnsemble strucure
36436 :
36437 : INPUT PARAMETERS:
36438 : Ensemble1 - original
36439 :
36440 : OUTPUT PARAMETERS:
36441 : Ensemble2 - copy
36442 :
36443 : -- ALGLIB --
36444 : Copyright 17.02.2009 by Bochkanov Sergey
36445 : *************************************************************************/
36446 0 : void mlpecopy(mlpensemble* ensemble1,
36447 : mlpensemble* ensemble2,
36448 : ae_state *_state)
36449 : {
36450 : ae_int_t ccount;
36451 : ae_int_t wcount;
36452 :
36453 0 : _mlpensemble_clear(ensemble2);
36454 :
36455 :
36456 : /*
36457 : * Unload info
36458 : */
36459 0 : if( mlpissoftmax(&ensemble1->network, _state) )
36460 : {
36461 0 : ccount = mlpgetinputscount(&ensemble1->network, _state);
36462 : }
36463 : else
36464 : {
36465 0 : ccount = mlpgetinputscount(&ensemble1->network, _state)+mlpgetoutputscount(&ensemble1->network, _state);
36466 : }
36467 0 : wcount = mlpgetweightscount(&ensemble1->network, _state);
36468 :
36469 : /*
36470 : * Allocate space
36471 : */
36472 0 : ae_vector_set_length(&ensemble2->weights, ensemble1->ensemblesize*wcount, _state);
36473 0 : ae_vector_set_length(&ensemble2->columnmeans, ensemble1->ensemblesize*ccount, _state);
36474 0 : ae_vector_set_length(&ensemble2->columnsigmas, ensemble1->ensemblesize*ccount, _state);
36475 0 : ae_vector_set_length(&ensemble2->y, mlpgetoutputscount(&ensemble1->network, _state), _state);
36476 :
36477 : /*
36478 : * Copy
36479 : */
36480 0 : ensemble2->ensemblesize = ensemble1->ensemblesize;
36481 0 : ae_v_move(&ensemble2->weights.ptr.p_double[0], 1, &ensemble1->weights.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*wcount-1));
36482 0 : ae_v_move(&ensemble2->columnmeans.ptr.p_double[0], 1, &ensemble1->columnmeans.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*ccount-1));
36483 0 : ae_v_move(&ensemble2->columnsigmas.ptr.p_double[0], 1, &ensemble1->columnsigmas.ptr.p_double[0], 1, ae_v_len(0,ensemble1->ensemblesize*ccount-1));
36484 0 : mlpcopy(&ensemble1->network, &ensemble2->network, _state);
36485 0 : }
36486 :
36487 :
36488 : /*************************************************************************
36489 : Randomization of MLP ensemble
36490 :
36491 : -- ALGLIB --
36492 : Copyright 17.02.2009 by Bochkanov Sergey
36493 : *************************************************************************/
36494 0 : void mlperandomize(mlpensemble* ensemble, ae_state *_state)
36495 : {
36496 : ae_int_t i;
36497 : ae_int_t wcount;
36498 :
36499 :
36500 0 : wcount = mlpgetweightscount(&ensemble->network, _state);
36501 0 : for(i=0; i<=ensemble->ensemblesize*wcount-1; i++)
36502 : {
36503 0 : ensemble->weights.ptr.p_double[i] = ae_randomreal(_state)-0.5;
36504 : }
36505 0 : }
36506 :
36507 :
36508 : /*************************************************************************
36509 : Return ensemble properties (number of inputs and outputs).
36510 :
36511 : -- ALGLIB --
36512 : Copyright 17.02.2009 by Bochkanov Sergey
36513 : *************************************************************************/
36514 0 : void mlpeproperties(mlpensemble* ensemble,
36515 : ae_int_t* nin,
36516 : ae_int_t* nout,
36517 : ae_state *_state)
36518 : {
36519 :
36520 0 : *nin = 0;
36521 0 : *nout = 0;
36522 :
36523 0 : *nin = mlpgetinputscount(&ensemble->network, _state);
36524 0 : *nout = mlpgetoutputscount(&ensemble->network, _state);
36525 0 : }
36526 :
36527 :
36528 : /*************************************************************************
36529 : Return normalization type (whether ensemble is SOFTMAX-normalized or not).
36530 :
36531 : -- ALGLIB --
36532 : Copyright 17.02.2009 by Bochkanov Sergey
36533 : *************************************************************************/
36534 0 : ae_bool mlpeissoftmax(mlpensemble* ensemble, ae_state *_state)
36535 : {
36536 : ae_bool result;
36537 :
36538 :
36539 0 : result = mlpissoftmax(&ensemble->network, _state);
36540 0 : return result;
36541 : }
36542 :
36543 :
36544 : /*************************************************************************
36545 : Procesing
36546 :
36547 : INPUT PARAMETERS:
36548 : Ensemble- neural networks ensemble
36549 : X - input vector, array[0..NIn-1].
36550 : Y - (possibly) preallocated buffer; if size of Y is less than
36551 : NOut, it will be reallocated. If it is large enough, it
36552 : is NOT reallocated, so we can save some time on reallocation.
36553 :
36554 :
36555 : OUTPUT PARAMETERS:
36556 : Y - result. Regression estimate when solving regression task,
36557 : vector of posterior probabilities for classification task.
36558 :
36559 : -- ALGLIB --
36560 : Copyright 17.02.2009 by Bochkanov Sergey
36561 : *************************************************************************/
36562 0 : void mlpeprocess(mlpensemble* ensemble,
36563 : /* Real */ ae_vector* x,
36564 : /* Real */ ae_vector* y,
36565 : ae_state *_state)
36566 : {
36567 : ae_int_t i;
36568 : ae_int_t es;
36569 : ae_int_t wc;
36570 : ae_int_t cc;
36571 : double v;
36572 : ae_int_t nout;
36573 :
36574 :
36575 0 : if( y->cnt<mlpgetoutputscount(&ensemble->network, _state) )
36576 : {
36577 0 : ae_vector_set_length(y, mlpgetoutputscount(&ensemble->network, _state), _state);
36578 : }
36579 0 : es = ensemble->ensemblesize;
36580 0 : wc = mlpgetweightscount(&ensemble->network, _state);
36581 0 : if( mlpissoftmax(&ensemble->network, _state) )
36582 : {
36583 0 : cc = mlpgetinputscount(&ensemble->network, _state);
36584 : }
36585 : else
36586 : {
36587 0 : cc = mlpgetinputscount(&ensemble->network, _state)+mlpgetoutputscount(&ensemble->network, _state);
36588 : }
36589 0 : v = (double)1/(double)es;
36590 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
36591 0 : for(i=0; i<=nout-1; i++)
36592 : {
36593 0 : y->ptr.p_double[i] = (double)(0);
36594 : }
36595 0 : for(i=0; i<=es-1; i++)
36596 : {
36597 0 : ae_v_move(&ensemble->network.weights.ptr.p_double[0], 1, &ensemble->weights.ptr.p_double[i*wc], 1, ae_v_len(0,wc-1));
36598 0 : ae_v_move(&ensemble->network.columnmeans.ptr.p_double[0], 1, &ensemble->columnmeans.ptr.p_double[i*cc], 1, ae_v_len(0,cc-1));
36599 0 : ae_v_move(&ensemble->network.columnsigmas.ptr.p_double[0], 1, &ensemble->columnsigmas.ptr.p_double[i*cc], 1, ae_v_len(0,cc-1));
36600 0 : mlpprocess(&ensemble->network, x, &ensemble->y, _state);
36601 0 : ae_v_addd(&y->ptr.p_double[0], 1, &ensemble->y.ptr.p_double[0], 1, ae_v_len(0,nout-1), v);
36602 : }
36603 0 : }
36604 :
36605 :
36606 : /*************************************************************************
36607 : 'interactive' variant of MLPEProcess for languages like Python which
36608 : support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the
36609 : interpreter
36610 :
36611 : This function allocates new array on each call, so it is significantly
36612 : slower than its 'non-interactive' counterpart, but it is more convenient
36613 : when you call it from command line.
36614 :
36615 : -- ALGLIB --
36616 : Copyright 17.02.2009 by Bochkanov Sergey
36617 : *************************************************************************/
36618 0 : void mlpeprocessi(mlpensemble* ensemble,
36619 : /* Real */ ae_vector* x,
36620 : /* Real */ ae_vector* y,
36621 : ae_state *_state)
36622 : {
36623 :
36624 0 : ae_vector_clear(y);
36625 :
36626 0 : mlpeprocess(ensemble, x, y, _state);
36627 0 : }
36628 :
36629 :
36630 : /*************************************************************************
36631 : Calculation of all types of errors
36632 :
36633 : -- ALGLIB --
36634 : Copyright 17.02.2009 by Bochkanov Sergey
36635 : *************************************************************************/
36636 0 : void mlpeallerrorsx(mlpensemble* ensemble,
36637 : /* Real */ ae_matrix* densexy,
36638 : sparsematrix* sparsexy,
36639 : ae_int_t datasetsize,
36640 : ae_int_t datasettype,
36641 : /* Integer */ ae_vector* idx,
36642 : ae_int_t subset0,
36643 : ae_int_t subset1,
36644 : ae_int_t subsettype,
36645 : ae_shared_pool* buf,
36646 : modelerrors* rep,
36647 : ae_state *_state)
36648 : {
36649 : ae_frame _frame_block;
36650 : ae_int_t i;
36651 : ae_int_t j;
36652 : ae_int_t nin;
36653 : ae_int_t nout;
36654 : ae_bool iscls;
36655 : ae_int_t srcidx;
36656 : mlpbuffers *pbuf;
36657 : ae_smart_ptr _pbuf;
36658 : modelerrors rep0;
36659 : modelerrors rep1;
36660 :
36661 0 : ae_frame_make(_state, &_frame_block);
36662 0 : memset(&_pbuf, 0, sizeof(_pbuf));
36663 0 : memset(&rep0, 0, sizeof(rep0));
36664 0 : memset(&rep1, 0, sizeof(rep1));
36665 0 : ae_smart_ptr_init(&_pbuf, (void**)&pbuf, _state, ae_true);
36666 0 : _modelerrors_init(&rep0, _state, ae_true);
36667 0 : _modelerrors_init(&rep1, _state, ae_true);
36668 :
36669 :
36670 : /*
36671 : * Get network information
36672 : */
36673 0 : nin = mlpgetinputscount(&ensemble->network, _state);
36674 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
36675 0 : iscls = mlpissoftmax(&ensemble->network, _state);
36676 :
36677 : /*
36678 : * Retrieve buffer, prepare, process data, recycle buffer
36679 : */
36680 0 : ae_shared_pool_retrieve(buf, &_pbuf, _state);
36681 0 : if( iscls )
36682 : {
36683 0 : dserrallocate(nout, &pbuf->tmp0, _state);
36684 : }
36685 : else
36686 : {
36687 0 : dserrallocate(-nout, &pbuf->tmp0, _state);
36688 : }
36689 0 : rvectorsetlengthatleast(&pbuf->x, nin, _state);
36690 0 : rvectorsetlengthatleast(&pbuf->y, nout, _state);
36691 0 : rvectorsetlengthatleast(&pbuf->desiredy, nout, _state);
36692 0 : for(i=subset0; i<=subset1-1; i++)
36693 : {
36694 0 : srcidx = -1;
36695 0 : if( subsettype==0 )
36696 : {
36697 0 : srcidx = i;
36698 : }
36699 0 : if( subsettype==1 )
36700 : {
36701 0 : srcidx = idx->ptr.p_int[i];
36702 : }
36703 0 : ae_assert(srcidx>=0, "MLPEAllErrorsX: internal error", _state);
36704 0 : if( datasettype==0 )
36705 : {
36706 0 : ae_v_move(&pbuf->x.ptr.p_double[0], 1, &densexy->ptr.pp_double[srcidx][0], 1, ae_v_len(0,nin-1));
36707 : }
36708 0 : if( datasettype==1 )
36709 : {
36710 0 : sparsegetrow(sparsexy, srcidx, &pbuf->x, _state);
36711 : }
36712 0 : mlpeprocess(ensemble, &pbuf->x, &pbuf->y, _state);
36713 0 : if( mlpissoftmax(&ensemble->network, _state) )
36714 : {
36715 0 : if( datasettype==0 )
36716 : {
36717 0 : pbuf->desiredy.ptr.p_double[0] = densexy->ptr.pp_double[srcidx][nin];
36718 : }
36719 0 : if( datasettype==1 )
36720 : {
36721 0 : pbuf->desiredy.ptr.p_double[0] = sparseget(sparsexy, srcidx, nin, _state);
36722 : }
36723 : }
36724 : else
36725 : {
36726 0 : if( datasettype==0 )
36727 : {
36728 0 : ae_v_move(&pbuf->desiredy.ptr.p_double[0], 1, &densexy->ptr.pp_double[srcidx][nin], 1, ae_v_len(0,nout-1));
36729 : }
36730 0 : if( datasettype==1 )
36731 : {
36732 0 : for(j=0; j<=nout-1; j++)
36733 : {
36734 0 : pbuf->desiredy.ptr.p_double[j] = sparseget(sparsexy, srcidx, nin+j, _state);
36735 : }
36736 : }
36737 : }
36738 0 : dserraccumulate(&pbuf->tmp0, &pbuf->y, &pbuf->desiredy, _state);
36739 : }
36740 0 : dserrfinish(&pbuf->tmp0, _state);
36741 0 : rep->relclserror = pbuf->tmp0.ptr.p_double[0];
36742 0 : rep->avgce = pbuf->tmp0.ptr.p_double[1]/ae_log((double)(2), _state);
36743 0 : rep->rmserror = pbuf->tmp0.ptr.p_double[2];
36744 0 : rep->avgerror = pbuf->tmp0.ptr.p_double[3];
36745 0 : rep->avgrelerror = pbuf->tmp0.ptr.p_double[4];
36746 0 : ae_shared_pool_recycle(buf, &_pbuf, _state);
36747 0 : ae_frame_leave(_state);
36748 0 : }
36749 :
36750 :
36751 : /*************************************************************************
36752 : Calculation of all types of errors on dataset given by sparse matrix
36753 :
36754 : -- ALGLIB --
36755 : Copyright 10.09.2012 by Bochkanov Sergey
36756 : *************************************************************************/
36757 0 : void mlpeallerrorssparse(mlpensemble* ensemble,
36758 : sparsematrix* xy,
36759 : ae_int_t npoints,
36760 : double* relcls,
36761 : double* avgce,
36762 : double* rms,
36763 : double* avg,
36764 : double* avgrel,
36765 : ae_state *_state)
36766 : {
36767 : ae_frame _frame_block;
36768 : ae_int_t i;
36769 : ae_vector buf;
36770 : ae_vector workx;
36771 : ae_vector y;
36772 : ae_vector dy;
36773 : ae_int_t nin;
36774 : ae_int_t nout;
36775 :
36776 0 : ae_frame_make(_state, &_frame_block);
36777 0 : memset(&buf, 0, sizeof(buf));
36778 0 : memset(&workx, 0, sizeof(workx));
36779 0 : memset(&y, 0, sizeof(y));
36780 0 : memset(&dy, 0, sizeof(dy));
36781 0 : *relcls = 0;
36782 0 : *avgce = 0;
36783 0 : *rms = 0;
36784 0 : *avg = 0;
36785 0 : *avgrel = 0;
36786 0 : ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
36787 0 : ae_vector_init(&workx, 0, DT_REAL, _state, ae_true);
36788 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
36789 0 : ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
36790 :
36791 0 : nin = mlpgetinputscount(&ensemble->network, _state);
36792 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
36793 0 : if( mlpissoftmax(&ensemble->network, _state) )
36794 : {
36795 0 : ae_vector_set_length(&dy, 1, _state);
36796 0 : dserrallocate(nout, &buf, _state);
36797 : }
36798 : else
36799 : {
36800 0 : ae_vector_set_length(&dy, nout, _state);
36801 0 : dserrallocate(-nout, &buf, _state);
36802 : }
36803 0 : for(i=0; i<=npoints-1; i++)
36804 : {
36805 0 : sparsegetrow(xy, i, &workx, _state);
36806 0 : mlpeprocess(ensemble, &workx, &y, _state);
36807 0 : if( mlpissoftmax(&ensemble->network, _state) )
36808 : {
36809 0 : dy.ptr.p_double[0] = workx.ptr.p_double[nin];
36810 : }
36811 : else
36812 : {
36813 0 : ae_v_move(&dy.ptr.p_double[0], 1, &workx.ptr.p_double[nin], 1, ae_v_len(0,nout-1));
36814 : }
36815 0 : dserraccumulate(&buf, &y, &dy, _state);
36816 : }
36817 0 : dserrfinish(&buf, _state);
36818 0 : *relcls = buf.ptr.p_double[0];
36819 0 : *avgce = buf.ptr.p_double[1];
36820 0 : *rms = buf.ptr.p_double[2];
36821 0 : *avg = buf.ptr.p_double[3];
36822 0 : *avgrel = buf.ptr.p_double[4];
36823 0 : ae_frame_leave(_state);
36824 0 : }
36825 :
36826 :
36827 : /*************************************************************************
36828 : Relative classification error on the test set
36829 :
36830 : INPUT PARAMETERS:
36831 : Ensemble- ensemble
36832 : XY - test set
36833 : NPoints - test set size
36834 :
36835 : RESULT:
36836 : percent of incorrectly classified cases.
36837 : Works both for classifier betwork and for regression networks which
36838 : are used as classifiers.
36839 :
36840 : -- ALGLIB --
36841 : Copyright 17.02.2009 by Bochkanov Sergey
36842 : *************************************************************************/
36843 0 : double mlperelclserror(mlpensemble* ensemble,
36844 : /* Real */ ae_matrix* xy,
36845 : ae_int_t npoints,
36846 : ae_state *_state)
36847 : {
36848 : ae_frame _frame_block;
36849 : modelerrors rep;
36850 : double result;
36851 :
36852 0 : ae_frame_make(_state, &_frame_block);
36853 0 : memset(&rep, 0, sizeof(rep));
36854 0 : _modelerrors_init(&rep, _state, ae_true);
36855 :
36856 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
36857 0 : result = rep.relclserror;
36858 0 : ae_frame_leave(_state);
36859 0 : return result;
36860 : }
36861 :
36862 :
36863 : /*************************************************************************
36864 : Average cross-entropy (in bits per element) on the test set
36865 :
36866 : INPUT PARAMETERS:
36867 : Ensemble- ensemble
36868 : XY - test set
36869 : NPoints - test set size
36870 :
36871 : RESULT:
36872 : CrossEntropy/(NPoints*LN(2)).
36873 : Zero if ensemble solves regression task.
36874 :
36875 : -- ALGLIB --
36876 : Copyright 17.02.2009 by Bochkanov Sergey
36877 : *************************************************************************/
36878 0 : double mlpeavgce(mlpensemble* ensemble,
36879 : /* Real */ ae_matrix* xy,
36880 : ae_int_t npoints,
36881 : ae_state *_state)
36882 : {
36883 : ae_frame _frame_block;
36884 : modelerrors rep;
36885 : double result;
36886 :
36887 0 : ae_frame_make(_state, &_frame_block);
36888 0 : memset(&rep, 0, sizeof(rep));
36889 0 : _modelerrors_init(&rep, _state, ae_true);
36890 :
36891 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
36892 0 : result = rep.avgce;
36893 0 : ae_frame_leave(_state);
36894 0 : return result;
36895 : }
36896 :
36897 :
36898 : /*************************************************************************
36899 : RMS error on the test set
36900 :
36901 : INPUT PARAMETERS:
36902 : Ensemble- ensemble
36903 : XY - test set
36904 : NPoints - test set size
36905 :
36906 : RESULT:
36907 : root mean square error.
36908 : Its meaning for regression task is obvious. As for classification task
36909 : RMS error means error when estimating posterior probabilities.
36910 :
36911 : -- ALGLIB --
36912 : Copyright 17.02.2009 by Bochkanov Sergey
36913 : *************************************************************************/
36914 0 : double mlpermserror(mlpensemble* ensemble,
36915 : /* Real */ ae_matrix* xy,
36916 : ae_int_t npoints,
36917 : ae_state *_state)
36918 : {
36919 : ae_frame _frame_block;
36920 : modelerrors rep;
36921 : double result;
36922 :
36923 0 : ae_frame_make(_state, &_frame_block);
36924 0 : memset(&rep, 0, sizeof(rep));
36925 0 : _modelerrors_init(&rep, _state, ae_true);
36926 :
36927 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
36928 0 : result = rep.rmserror;
36929 0 : ae_frame_leave(_state);
36930 0 : return result;
36931 : }
36932 :
36933 :
36934 : /*************************************************************************
36935 : Average error on the test set
36936 :
36937 : INPUT PARAMETERS:
36938 : Ensemble- ensemble
36939 : XY - test set
36940 : NPoints - test set size
36941 :
36942 : RESULT:
36943 : Its meaning for regression task is obvious. As for classification task
36944 : it means average error when estimating posterior probabilities.
36945 :
36946 : -- ALGLIB --
36947 : Copyright 17.02.2009 by Bochkanov Sergey
36948 : *************************************************************************/
36949 0 : double mlpeavgerror(mlpensemble* ensemble,
36950 : /* Real */ ae_matrix* xy,
36951 : ae_int_t npoints,
36952 : ae_state *_state)
36953 : {
36954 : ae_frame _frame_block;
36955 : modelerrors rep;
36956 : double result;
36957 :
36958 0 : ae_frame_make(_state, &_frame_block);
36959 0 : memset(&rep, 0, sizeof(rep));
36960 0 : _modelerrors_init(&rep, _state, ae_true);
36961 :
36962 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
36963 0 : result = rep.avgerror;
36964 0 : ae_frame_leave(_state);
36965 0 : return result;
36966 : }
36967 :
36968 :
36969 : /*************************************************************************
36970 : Average relative error on the test set
36971 :
36972 : INPUT PARAMETERS:
36973 : Ensemble- ensemble
36974 : XY - test set
36975 : NPoints - test set size
36976 :
36977 : RESULT:
36978 : Its meaning for regression task is obvious. As for classification task
36979 : it means average relative error when estimating posterior probabilities.
36980 :
36981 : -- ALGLIB --
36982 : Copyright 17.02.2009 by Bochkanov Sergey
36983 : *************************************************************************/
36984 0 : double mlpeavgrelerror(mlpensemble* ensemble,
36985 : /* Real */ ae_matrix* xy,
36986 : ae_int_t npoints,
36987 : ae_state *_state)
36988 : {
36989 : ae_frame _frame_block;
36990 : modelerrors rep;
36991 : double result;
36992 :
36993 0 : ae_frame_make(_state, &_frame_block);
36994 0 : memset(&rep, 0, sizeof(rep));
36995 0 : _modelerrors_init(&rep, _state, ae_true);
36996 :
36997 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &rep, _state);
36998 0 : result = rep.avgrelerror;
36999 0 : ae_frame_leave(_state);
37000 0 : return result;
37001 : }
37002 :
37003 :
37004 : /*************************************************************************
37005 : Serializer: allocation
37006 :
37007 : -- ALGLIB --
37008 : Copyright 19.10.2011 by Bochkanov Sergey
37009 : *************************************************************************/
37010 0 : void mlpealloc(ae_serializer* s, mlpensemble* ensemble, ae_state *_state)
37011 : {
37012 :
37013 :
37014 0 : ae_serializer_alloc_entry(s);
37015 0 : ae_serializer_alloc_entry(s);
37016 0 : ae_serializer_alloc_entry(s);
37017 0 : allocrealarray(s, &ensemble->weights, -1, _state);
37018 0 : allocrealarray(s, &ensemble->columnmeans, -1, _state);
37019 0 : allocrealarray(s, &ensemble->columnsigmas, -1, _state);
37020 0 : mlpalloc(s, &ensemble->network, _state);
37021 0 : }
37022 :
37023 :
37024 : /*************************************************************************
37025 : Serializer: serialization
37026 :
37027 : -- ALGLIB --
37028 : Copyright 14.03.2011 by Bochkanov Sergey
37029 : *************************************************************************/
37030 0 : void mlpeserialize(ae_serializer* s,
37031 : mlpensemble* ensemble,
37032 : ae_state *_state)
37033 : {
37034 :
37035 :
37036 0 : ae_serializer_serialize_int(s, getmlpeserializationcode(_state), _state);
37037 0 : ae_serializer_serialize_int(s, mlpe_mlpefirstversion, _state);
37038 0 : ae_serializer_serialize_int(s, ensemble->ensemblesize, _state);
37039 0 : serializerealarray(s, &ensemble->weights, -1, _state);
37040 0 : serializerealarray(s, &ensemble->columnmeans, -1, _state);
37041 0 : serializerealarray(s, &ensemble->columnsigmas, -1, _state);
37042 0 : mlpserialize(s, &ensemble->network, _state);
37043 0 : }
37044 :
37045 :
37046 : /*************************************************************************
37047 : Serializer: unserialization
37048 :
37049 : -- ALGLIB --
37050 : Copyright 14.03.2011 by Bochkanov Sergey
37051 : *************************************************************************/
37052 0 : void mlpeunserialize(ae_serializer* s,
37053 : mlpensemble* ensemble,
37054 : ae_state *_state)
37055 : {
37056 : ae_int_t i0;
37057 : ae_int_t i1;
37058 :
37059 0 : _mlpensemble_clear(ensemble);
37060 :
37061 :
37062 : /*
37063 : * check correctness of header
37064 : */
37065 0 : ae_serializer_unserialize_int(s, &i0, _state);
37066 0 : ae_assert(i0==getmlpeserializationcode(_state), "MLPEUnserialize: stream header corrupted", _state);
37067 0 : ae_serializer_unserialize_int(s, &i1, _state);
37068 0 : ae_assert(i1==mlpe_mlpefirstversion, "MLPEUnserialize: stream header corrupted", _state);
37069 :
37070 : /*
37071 : * Create network
37072 : */
37073 0 : ae_serializer_unserialize_int(s, &ensemble->ensemblesize, _state);
37074 0 : unserializerealarray(s, &ensemble->weights, _state);
37075 0 : unserializerealarray(s, &ensemble->columnmeans, _state);
37076 0 : unserializerealarray(s, &ensemble->columnsigmas, _state);
37077 0 : mlpunserialize(s, &ensemble->network, _state);
37078 :
37079 : /*
37080 : * Allocate termoraries
37081 : */
37082 0 : ae_vector_set_length(&ensemble->y, mlpgetoutputscount(&ensemble->network, _state), _state);
37083 0 : }
37084 :
37085 :
37086 0 : void _mlpensemble_init(void* _p, ae_state *_state, ae_bool make_automatic)
37087 : {
37088 0 : mlpensemble *p = (mlpensemble*)_p;
37089 0 : ae_touch_ptr((void*)p);
37090 0 : ae_vector_init(&p->weights, 0, DT_REAL, _state, make_automatic);
37091 0 : ae_vector_init(&p->columnmeans, 0, DT_REAL, _state, make_automatic);
37092 0 : ae_vector_init(&p->columnsigmas, 0, DT_REAL, _state, make_automatic);
37093 0 : _multilayerperceptron_init(&p->network, _state, make_automatic);
37094 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
37095 0 : }
37096 :
37097 :
37098 0 : void _mlpensemble_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
37099 : {
37100 0 : mlpensemble *dst = (mlpensemble*)_dst;
37101 0 : mlpensemble *src = (mlpensemble*)_src;
37102 0 : dst->ensemblesize = src->ensemblesize;
37103 0 : ae_vector_init_copy(&dst->weights, &src->weights, _state, make_automatic);
37104 0 : ae_vector_init_copy(&dst->columnmeans, &src->columnmeans, _state, make_automatic);
37105 0 : ae_vector_init_copy(&dst->columnsigmas, &src->columnsigmas, _state, make_automatic);
37106 0 : _multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
37107 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
37108 0 : }
37109 :
37110 :
37111 0 : void _mlpensemble_clear(void* _p)
37112 : {
37113 0 : mlpensemble *p = (mlpensemble*)_p;
37114 0 : ae_touch_ptr((void*)p);
37115 0 : ae_vector_clear(&p->weights);
37116 0 : ae_vector_clear(&p->columnmeans);
37117 0 : ae_vector_clear(&p->columnsigmas);
37118 0 : _multilayerperceptron_clear(&p->network);
37119 0 : ae_vector_clear(&p->y);
37120 0 : }
37121 :
37122 :
37123 0 : void _mlpensemble_destroy(void* _p)
37124 : {
37125 0 : mlpensemble *p = (mlpensemble*)_p;
37126 0 : ae_touch_ptr((void*)p);
37127 0 : ae_vector_destroy(&p->weights);
37128 0 : ae_vector_destroy(&p->columnmeans);
37129 0 : ae_vector_destroy(&p->columnsigmas);
37130 0 : _multilayerperceptron_destroy(&p->network);
37131 0 : ae_vector_destroy(&p->y);
37132 0 : }
37133 :
37134 :
37135 : #endif
37136 : #if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
37137 :
37138 :
37139 : /*************************************************************************
37140 : Neural network training using modified Levenberg-Marquardt with exact
37141 : Hessian calculation and regularization. Subroutine trains neural network
37142 : with restarts from random positions. Algorithm is well suited for small
37143 : and medium scale problems (hundreds of weights).
37144 :
37145 : INPUT PARAMETERS:
37146 : Network - neural network with initialized geometry
37147 : XY - training set
37148 : NPoints - training set size
37149 : Decay - weight decay constant, >=0.001
37150 : Decay term 'Decay*||Weights||^2' is added to error
37151 : function.
37152 : If you don't know what Decay to choose, use 0.001.
37153 : Restarts - number of restarts from random position, >0.
37154 : If you don't know what Restarts to choose, use 2.
37155 :
37156 : OUTPUT PARAMETERS:
37157 : Network - trained neural network.
37158 : Info - return code:
37159 : * -9, if internal matrix inverse subroutine failed
37160 : * -2, if there is a point with class number
37161 : outside of [0..NOut-1].
37162 : * -1, if wrong parameters specified
37163 : (NPoints<0, Restarts<1).
37164 : * 2, if task has been solved.
37165 : Rep - training report
37166 :
37167 : -- ALGLIB --
37168 : Copyright 10.03.2009 by Bochkanov Sergey
37169 : *************************************************************************/
37170 0 : void mlptrainlm(multilayerperceptron* network,
37171 : /* Real */ ae_matrix* xy,
37172 : ae_int_t npoints,
37173 : double decay,
37174 : ae_int_t restarts,
37175 : ae_int_t* info,
37176 : mlpreport* rep,
37177 : ae_state *_state)
37178 : {
37179 : ae_frame _frame_block;
37180 : ae_int_t nin;
37181 : ae_int_t nout;
37182 : ae_int_t wcount;
37183 : double lmsteptol;
37184 : ae_int_t i;
37185 : ae_int_t k;
37186 : double v;
37187 : double e;
37188 : double enew;
37189 : double xnorm2;
37190 : double stepnorm;
37191 : ae_vector g;
37192 : ae_vector d;
37193 : ae_matrix h;
37194 : ae_matrix hmod;
37195 : ae_matrix z;
37196 : ae_bool spd;
37197 : double nu;
37198 : double lambdav;
37199 : double lambdaup;
37200 : double lambdadown;
37201 : minlbfgsreport internalrep;
37202 : minlbfgsstate state;
37203 : ae_vector x;
37204 : ae_vector y;
37205 : ae_vector wbase;
37206 : ae_vector wdir;
37207 : ae_vector wt;
37208 : ae_vector wx;
37209 : ae_int_t pass;
37210 : ae_vector wbest;
37211 : double ebest;
37212 : ae_int_t invinfo;
37213 : matinvreport invrep;
37214 : ae_int_t solverinfo;
37215 : densesolverreport solverrep;
37216 :
37217 0 : ae_frame_make(_state, &_frame_block);
37218 0 : memset(&g, 0, sizeof(g));
37219 0 : memset(&d, 0, sizeof(d));
37220 0 : memset(&h, 0, sizeof(h));
37221 0 : memset(&hmod, 0, sizeof(hmod));
37222 0 : memset(&z, 0, sizeof(z));
37223 0 : memset(&internalrep, 0, sizeof(internalrep));
37224 0 : memset(&state, 0, sizeof(state));
37225 0 : memset(&x, 0, sizeof(x));
37226 0 : memset(&y, 0, sizeof(y));
37227 0 : memset(&wbase, 0, sizeof(wbase));
37228 0 : memset(&wdir, 0, sizeof(wdir));
37229 0 : memset(&wt, 0, sizeof(wt));
37230 0 : memset(&wx, 0, sizeof(wx));
37231 0 : memset(&wbest, 0, sizeof(wbest));
37232 0 : memset(&invrep, 0, sizeof(invrep));
37233 0 : memset(&solverrep, 0, sizeof(solverrep));
37234 0 : *info = 0;
37235 0 : _mlpreport_clear(rep);
37236 0 : ae_vector_init(&g, 0, DT_REAL, _state, ae_true);
37237 0 : ae_vector_init(&d, 0, DT_REAL, _state, ae_true);
37238 0 : ae_matrix_init(&h, 0, 0, DT_REAL, _state, ae_true);
37239 0 : ae_matrix_init(&hmod, 0, 0, DT_REAL, _state, ae_true);
37240 0 : ae_matrix_init(&z, 0, 0, DT_REAL, _state, ae_true);
37241 0 : _minlbfgsreport_init(&internalrep, _state, ae_true);
37242 0 : _minlbfgsstate_init(&state, _state, ae_true);
37243 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
37244 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
37245 0 : ae_vector_init(&wbase, 0, DT_REAL, _state, ae_true);
37246 0 : ae_vector_init(&wdir, 0, DT_REAL, _state, ae_true);
37247 0 : ae_vector_init(&wt, 0, DT_REAL, _state, ae_true);
37248 0 : ae_vector_init(&wx, 0, DT_REAL, _state, ae_true);
37249 0 : ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
37250 0 : _matinvreport_init(&invrep, _state, ae_true);
37251 0 : _densesolverreport_init(&solverrep, _state, ae_true);
37252 :
37253 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
37254 0 : lambdaup = (double)(10);
37255 0 : lambdadown = 0.3;
37256 0 : lmsteptol = 0.001;
37257 :
37258 : /*
37259 : * Test for inputs
37260 : */
37261 0 : if( npoints<=0||restarts<1 )
37262 : {
37263 0 : *info = -1;
37264 0 : ae_frame_leave(_state);
37265 0 : return;
37266 : }
37267 0 : if( mlpissoftmax(network, _state) )
37268 : {
37269 0 : for(i=0; i<=npoints-1; i++)
37270 : {
37271 0 : if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
37272 : {
37273 0 : *info = -2;
37274 0 : ae_frame_leave(_state);
37275 0 : return;
37276 : }
37277 : }
37278 : }
37279 0 : decay = ae_maxreal(decay, mlptrain_mindecay, _state);
37280 0 : *info = 2;
37281 :
37282 : /*
37283 : * Initialize data
37284 : */
37285 0 : rep->ngrad = 0;
37286 0 : rep->nhess = 0;
37287 0 : rep->ncholesky = 0;
37288 :
37289 : /*
37290 : * General case.
37291 : * Prepare task and network. Allocate space.
37292 : */
37293 0 : mlpinitpreprocessor(network, xy, npoints, _state);
37294 0 : ae_vector_set_length(&g, wcount-1+1, _state);
37295 0 : ae_matrix_set_length(&h, wcount-1+1, wcount-1+1, _state);
37296 0 : ae_matrix_set_length(&hmod, wcount-1+1, wcount-1+1, _state);
37297 0 : ae_vector_set_length(&wbase, wcount-1+1, _state);
37298 0 : ae_vector_set_length(&wdir, wcount-1+1, _state);
37299 0 : ae_vector_set_length(&wbest, wcount-1+1, _state);
37300 0 : ae_vector_set_length(&wt, wcount-1+1, _state);
37301 0 : ae_vector_set_length(&wx, wcount-1+1, _state);
37302 0 : ebest = ae_maxrealnumber;
37303 :
37304 : /*
37305 : * Multiple passes
37306 : */
37307 0 : for(pass=1; pass<=restarts; pass++)
37308 : {
37309 :
37310 : /*
37311 : * Initialize weights
37312 : */
37313 0 : mlprandomize(network, _state);
37314 :
37315 : /*
37316 : * First stage of the hybrid algorithm: LBFGS
37317 : */
37318 0 : ae_v_move(&wbase.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37319 0 : minlbfgscreate(wcount, ae_minint(wcount, 5, _state), &wbase, &state, _state);
37320 0 : minlbfgssetcond(&state, (double)(0), (double)(0), (double)(0), ae_maxint(25, wcount, _state), _state);
37321 0 : while(minlbfgsiteration(&state, _state))
37322 : {
37323 :
37324 : /*
37325 : * gradient
37326 : */
37327 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37328 0 : mlpgradbatch(network, xy, npoints, &state.f, &state.g, _state);
37329 :
37330 : /*
37331 : * weight decay
37332 : */
37333 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37334 0 : state.f = state.f+0.5*decay*v;
37335 0 : ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
37336 :
37337 : /*
37338 : * next iteration
37339 : */
37340 0 : rep->ngrad = rep->ngrad+1;
37341 : }
37342 0 : minlbfgsresults(&state, &wbase, &internalrep, _state);
37343 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &wbase.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37344 :
37345 : /*
37346 : * Second stage of the hybrid algorithm: LM
37347 : *
37348 : * Initialize H with identity matrix,
37349 : * G with gradient,
37350 : * E with regularized error.
37351 : */
37352 0 : mlphessianbatch(network, xy, npoints, &e, &g, &h, _state);
37353 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37354 0 : e = e+0.5*decay*v;
37355 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
37356 0 : for(k=0; k<=wcount-1; k++)
37357 : {
37358 0 : h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
37359 : }
37360 0 : rep->nhess = rep->nhess+1;
37361 0 : lambdav = 0.001;
37362 0 : nu = (double)(2);
37363 : for(;;)
37364 : {
37365 :
37366 : /*
37367 : * 1. HMod = H+lambda*I
37368 : * 2. Try to solve (H+Lambda*I)*dx = -g.
37369 : * Increase lambda if left part is not positive definite.
37370 : */
37371 0 : for(i=0; i<=wcount-1; i++)
37372 : {
37373 0 : ae_v_move(&hmod.ptr.pp_double[i][0], 1, &h.ptr.pp_double[i][0], 1, ae_v_len(0,wcount-1));
37374 0 : hmod.ptr.pp_double[i][i] = hmod.ptr.pp_double[i][i]+lambdav;
37375 : }
37376 0 : spd = spdmatrixcholesky(&hmod, wcount, ae_true, _state);
37377 0 : rep->ncholesky = rep->ncholesky+1;
37378 0 : if( !spd )
37379 : {
37380 0 : lambdav = lambdav*lambdaup*nu;
37381 0 : nu = nu*2;
37382 0 : continue;
37383 : }
37384 0 : spdmatrixcholeskysolve(&hmod, wcount, ae_true, &g, &solverinfo, &solverrep, &wdir, _state);
37385 0 : if( solverinfo<0 )
37386 : {
37387 0 : lambdav = lambdav*lambdaup*nu;
37388 0 : nu = nu*2;
37389 0 : continue;
37390 : }
37391 0 : ae_v_muld(&wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1), -1);
37392 :
37393 : /*
37394 : * Lambda found.
37395 : * 1. Save old w in WBase
37396 : * 1. Test some stopping criterions
37397 : * 2. If error(w+wdir)>error(w), increase lambda
37398 : */
37399 0 : ae_v_add(&network->weights.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37400 0 : xnorm2 = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37401 0 : stepnorm = ae_v_dotproduct(&wdir.ptr.p_double[0], 1, &wdir.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37402 0 : stepnorm = ae_sqrt(stepnorm, _state);
37403 0 : enew = mlperror(network, xy, npoints, _state)+0.5*decay*xnorm2;
37404 0 : if( ae_fp_less(stepnorm,lmsteptol*(1+ae_sqrt(xnorm2, _state))) )
37405 : {
37406 0 : break;
37407 : }
37408 0 : if( ae_fp_greater(enew,e) )
37409 : {
37410 0 : lambdav = lambdav*lambdaup*nu;
37411 0 : nu = nu*2;
37412 0 : continue;
37413 : }
37414 :
37415 : /*
37416 : * Optimize using inv(cholesky(H)) as preconditioner
37417 : */
37418 0 : rmatrixtrinverse(&hmod, wcount, ae_true, ae_false, &invinfo, &invrep, _state);
37419 0 : if( invinfo<=0 )
37420 : {
37421 :
37422 : /*
37423 : * if matrix can't be inverted then exit with errors
37424 : * TODO: make WCount steps in direction suggested by HMod
37425 : */
37426 0 : *info = -9;
37427 0 : ae_frame_leave(_state);
37428 0 : return;
37429 : }
37430 0 : ae_v_move(&wbase.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37431 0 : for(i=0; i<=wcount-1; i++)
37432 : {
37433 0 : wt.ptr.p_double[i] = (double)(0);
37434 : }
37435 0 : minlbfgscreatex(wcount, wcount, &wt, 1, 0.0, &state, _state);
37436 0 : minlbfgssetcond(&state, (double)(0), (double)(0), (double)(0), 5, _state);
37437 0 : while(minlbfgsiteration(&state, _state))
37438 : {
37439 :
37440 : /*
37441 : * gradient
37442 : */
37443 0 : for(i=0; i<=wcount-1; i++)
37444 : {
37445 0 : v = ae_v_dotproduct(&state.x.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1));
37446 0 : network->weights.ptr.p_double[i] = wbase.ptr.p_double[i]+v;
37447 : }
37448 0 : mlpgradbatch(network, xy, npoints, &state.f, &g, _state);
37449 0 : for(i=0; i<=wcount-1; i++)
37450 : {
37451 0 : state.g.ptr.p_double[i] = (double)(0);
37452 : }
37453 0 : for(i=0; i<=wcount-1; i++)
37454 : {
37455 0 : v = g.ptr.p_double[i];
37456 0 : ae_v_addd(&state.g.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1), v);
37457 : }
37458 :
37459 : /*
37460 : * weight decay
37461 : * grad(x'*x) = A'*(x0+A*t)
37462 : */
37463 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37464 0 : state.f = state.f+0.5*decay*v;
37465 0 : for(i=0; i<=wcount-1; i++)
37466 : {
37467 0 : v = decay*network->weights.ptr.p_double[i];
37468 0 : ae_v_addd(&state.g.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1), v);
37469 : }
37470 :
37471 : /*
37472 : * next iteration
37473 : */
37474 0 : rep->ngrad = rep->ngrad+1;
37475 : }
37476 0 : minlbfgsresults(&state, &wt, &internalrep, _state);
37477 :
37478 : /*
37479 : * Accept new position.
37480 : * Calculate Hessian
37481 : */
37482 0 : for(i=0; i<=wcount-1; i++)
37483 : {
37484 0 : v = ae_v_dotproduct(&wt.ptr.p_double[i], 1, &hmod.ptr.pp_double[i][i], 1, ae_v_len(i,wcount-1));
37485 0 : network->weights.ptr.p_double[i] = wbase.ptr.p_double[i]+v;
37486 : }
37487 0 : mlphessianbatch(network, xy, npoints, &e, &g, &h, _state);
37488 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37489 0 : e = e+0.5*decay*v;
37490 0 : ae_v_addd(&g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
37491 0 : for(k=0; k<=wcount-1; k++)
37492 : {
37493 0 : h.ptr.pp_double[k][k] = h.ptr.pp_double[k][k]+decay;
37494 : }
37495 0 : rep->nhess = rep->nhess+1;
37496 :
37497 : /*
37498 : * Update lambda
37499 : */
37500 0 : lambdav = lambdav*lambdadown;
37501 0 : nu = (double)(2);
37502 : }
37503 :
37504 : /*
37505 : * update WBest
37506 : */
37507 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37508 0 : e = 0.5*decay*v+mlperror(network, xy, npoints, _state);
37509 0 : if( ae_fp_less(e,ebest) )
37510 : {
37511 0 : ebest = e;
37512 0 : ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37513 : }
37514 : }
37515 :
37516 : /*
37517 : * copy WBest to output
37518 : */
37519 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37520 0 : ae_frame_leave(_state);
37521 : }
37522 :
37523 :
37524 : /*************************************************************************
37525 : Neural network training using L-BFGS algorithm with regularization.
37526 : Subroutine trains neural network with restarts from random positions.
37527 : Algorithm is well suited for problems of any dimensionality (memory
37528 : requirements and step complexity are linear by weights number).
37529 :
37530 : INPUT PARAMETERS:
37531 : Network - neural network with initialized geometry
37532 : XY - training set
37533 : NPoints - training set size
37534 : Decay - weight decay constant, >=0.001
37535 : Decay term 'Decay*||Weights||^2' is added to error
37536 : function.
37537 : If you don't know what Decay to choose, use 0.001.
37538 : Restarts - number of restarts from random position, >0.
37539 : If you don't know what Restarts to choose, use 2.
37540 : WStep - stopping criterion. Algorithm stops if step size is
37541 : less than WStep. Recommended value - 0.01. Zero step
37542 : size means stopping after MaxIts iterations.
37543 : MaxIts - stopping criterion. Algorithm stops after MaxIts
37544 : iterations (NOT gradient calculations). Zero MaxIts
37545 : means stopping when step is sufficiently small.
37546 :
37547 : OUTPUT PARAMETERS:
37548 : Network - trained neural network.
37549 : Info - return code:
37550 : * -8, if both WStep=0 and MaxIts=0
37551 : * -2, if there is a point with class number
37552 : outside of [0..NOut-1].
37553 : * -1, if wrong parameters specified
37554 : (NPoints<0, Restarts<1).
37555 : * 2, if task has been solved.
37556 : Rep - training report
37557 :
37558 : -- ALGLIB --
37559 : Copyright 09.12.2007 by Bochkanov Sergey
37560 : *************************************************************************/
37561 0 : void mlptrainlbfgs(multilayerperceptron* network,
37562 : /* Real */ ae_matrix* xy,
37563 : ae_int_t npoints,
37564 : double decay,
37565 : ae_int_t restarts,
37566 : double wstep,
37567 : ae_int_t maxits,
37568 : ae_int_t* info,
37569 : mlpreport* rep,
37570 : ae_state *_state)
37571 : {
37572 : ae_frame _frame_block;
37573 : ae_int_t i;
37574 : ae_int_t pass;
37575 : ae_int_t nin;
37576 : ae_int_t nout;
37577 : ae_int_t wcount;
37578 : ae_vector w;
37579 : ae_vector wbest;
37580 : double e;
37581 : double v;
37582 : double ebest;
37583 : minlbfgsreport internalrep;
37584 : minlbfgsstate state;
37585 :
37586 0 : ae_frame_make(_state, &_frame_block);
37587 0 : memset(&w, 0, sizeof(w));
37588 0 : memset(&wbest, 0, sizeof(wbest));
37589 0 : memset(&internalrep, 0, sizeof(internalrep));
37590 0 : memset(&state, 0, sizeof(state));
37591 0 : *info = 0;
37592 0 : _mlpreport_clear(rep);
37593 0 : ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
37594 0 : ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
37595 0 : _minlbfgsreport_init(&internalrep, _state, ae_true);
37596 0 : _minlbfgsstate_init(&state, _state, ae_true);
37597 :
37598 :
37599 : /*
37600 : * Test inputs, parse flags, read network geometry
37601 : */
37602 0 : if( ae_fp_eq(wstep,(double)(0))&&maxits==0 )
37603 : {
37604 0 : *info = -8;
37605 0 : ae_frame_leave(_state);
37606 0 : return;
37607 : }
37608 0 : if( ((npoints<=0||restarts<1)||ae_fp_less(wstep,(double)(0)))||maxits<0 )
37609 : {
37610 0 : *info = -1;
37611 0 : ae_frame_leave(_state);
37612 0 : return;
37613 : }
37614 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
37615 0 : if( mlpissoftmax(network, _state) )
37616 : {
37617 0 : for(i=0; i<=npoints-1; i++)
37618 : {
37619 0 : if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
37620 : {
37621 0 : *info = -2;
37622 0 : ae_frame_leave(_state);
37623 0 : return;
37624 : }
37625 : }
37626 : }
37627 0 : decay = ae_maxreal(decay, mlptrain_mindecay, _state);
37628 0 : *info = 2;
37629 :
37630 : /*
37631 : * Prepare
37632 : */
37633 0 : mlpinitpreprocessor(network, xy, npoints, _state);
37634 0 : ae_vector_set_length(&w, wcount-1+1, _state);
37635 0 : ae_vector_set_length(&wbest, wcount-1+1, _state);
37636 0 : ebest = ae_maxrealnumber;
37637 :
37638 : /*
37639 : * Multiple starts
37640 : */
37641 0 : rep->ncholesky = 0;
37642 0 : rep->nhess = 0;
37643 0 : rep->ngrad = 0;
37644 0 : for(pass=1; pass<=restarts; pass++)
37645 : {
37646 :
37647 : /*
37648 : * Process
37649 : */
37650 0 : mlprandomize(network, _state);
37651 0 : ae_v_move(&w.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37652 0 : minlbfgscreate(wcount, ae_minint(wcount, 10, _state), &w, &state, _state);
37653 0 : minlbfgssetcond(&state, 0.0, 0.0, wstep, maxits, _state);
37654 0 : while(minlbfgsiteration(&state, _state))
37655 : {
37656 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37657 0 : mlpgradnbatch(network, xy, npoints, &state.f, &state.g, _state);
37658 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37659 0 : state.f = state.f+0.5*decay*v;
37660 0 : ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
37661 0 : rep->ngrad = rep->ngrad+1;
37662 : }
37663 0 : minlbfgsresults(&state, &w, &internalrep, _state);
37664 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &w.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37665 :
37666 : /*
37667 : * Compare with best
37668 : */
37669 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37670 0 : e = mlperrorn(network, xy, npoints, _state)+0.5*decay*v;
37671 0 : if( ae_fp_less(e,ebest) )
37672 : {
37673 0 : ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37674 0 : ebest = e;
37675 : }
37676 : }
37677 :
37678 : /*
37679 : * The best network
37680 : */
37681 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37682 0 : ae_frame_leave(_state);
37683 : }
37684 :
37685 :
37686 : /*************************************************************************
37687 : Neural network training using early stopping (base algorithm - L-BFGS with
37688 : regularization).
37689 :
37690 : INPUT PARAMETERS:
37691 : Network - neural network with initialized geometry
37692 : TrnXY - training set
37693 : TrnSize - training set size, TrnSize>0
37694 : ValXY - validation set
37695 : ValSize - validation set size, ValSize>0
37696 : Decay - weight decay constant, >=0.001
37697 : Decay term 'Decay*||Weights||^2' is added to error
37698 : function.
37699 : If you don't know what Decay to choose, use 0.001.
37700 : Restarts - number of restarts, either:
37701 : * strictly positive number - algorithm make specified
37702 : number of restarts from random position.
37703 : * -1, in which case algorithm makes exactly one run
37704 : from the initial state of the network (no randomization).
37705 : If you don't know what Restarts to choose, choose one
37706 : one the following:
37707 : * -1 (deterministic start)
37708 : * +1 (one random restart)
37709 : * +5 (moderate amount of random restarts)
37710 :
37711 : OUTPUT PARAMETERS:
37712 : Network - trained neural network.
37713 : Info - return code:
37714 : * -2, if there is a point with class number
37715 : outside of [0..NOut-1].
37716 : * -1, if wrong parameters specified
37717 : (NPoints<0, Restarts<1, ...).
37718 : * 2, task has been solved, stopping criterion met -
37719 : sufficiently small step size. Not expected (we
37720 : use EARLY stopping) but possible and not an
37721 : error.
37722 : * 6, task has been solved, stopping criterion met -
37723 : increasing of validation set error.
37724 : Rep - training report
37725 :
37726 : NOTE:
37727 :
37728 : Algorithm stops if validation set error increases for a long enough or
37729 : step size is small enought (there are task where validation set may
37730 : decrease for eternity). In any case solution returned corresponds to the
37731 : minimum of validation set error.
37732 :
37733 : -- ALGLIB --
37734 : Copyright 10.03.2009 by Bochkanov Sergey
37735 : *************************************************************************/
37736 0 : void mlptraines(multilayerperceptron* network,
37737 : /* Real */ ae_matrix* trnxy,
37738 : ae_int_t trnsize,
37739 : /* Real */ ae_matrix* valxy,
37740 : ae_int_t valsize,
37741 : double decay,
37742 : ae_int_t restarts,
37743 : ae_int_t* info,
37744 : mlpreport* rep,
37745 : ae_state *_state)
37746 : {
37747 : ae_frame _frame_block;
37748 : ae_int_t i;
37749 : ae_int_t pass;
37750 : ae_int_t nin;
37751 : ae_int_t nout;
37752 : ae_int_t wcount;
37753 : ae_vector w;
37754 : ae_vector wbest;
37755 : double e;
37756 : double v;
37757 : double ebest;
37758 : ae_vector wfinal;
37759 : double efinal;
37760 : ae_int_t itcnt;
37761 : ae_int_t itbest;
37762 : minlbfgsreport internalrep;
37763 : minlbfgsstate state;
37764 : double wstep;
37765 : ae_bool needrandomization;
37766 :
37767 0 : ae_frame_make(_state, &_frame_block);
37768 0 : memset(&w, 0, sizeof(w));
37769 0 : memset(&wbest, 0, sizeof(wbest));
37770 0 : memset(&wfinal, 0, sizeof(wfinal));
37771 0 : memset(&internalrep, 0, sizeof(internalrep));
37772 0 : memset(&state, 0, sizeof(state));
37773 0 : *info = 0;
37774 0 : _mlpreport_clear(rep);
37775 0 : ae_vector_init(&w, 0, DT_REAL, _state, ae_true);
37776 0 : ae_vector_init(&wbest, 0, DT_REAL, _state, ae_true);
37777 0 : ae_vector_init(&wfinal, 0, DT_REAL, _state, ae_true);
37778 0 : _minlbfgsreport_init(&internalrep, _state, ae_true);
37779 0 : _minlbfgsstate_init(&state, _state, ae_true);
37780 :
37781 0 : wstep = 0.001;
37782 :
37783 : /*
37784 : * Test inputs, parse flags, read network geometry
37785 : */
37786 0 : if( ((trnsize<=0||valsize<=0)||(restarts<1&&restarts!=-1))||ae_fp_less(decay,(double)(0)) )
37787 : {
37788 0 : *info = -1;
37789 0 : ae_frame_leave(_state);
37790 0 : return;
37791 : }
37792 0 : if( restarts==-1 )
37793 : {
37794 0 : needrandomization = ae_false;
37795 0 : restarts = 1;
37796 : }
37797 : else
37798 : {
37799 0 : needrandomization = ae_true;
37800 : }
37801 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
37802 0 : if( mlpissoftmax(network, _state) )
37803 : {
37804 0 : for(i=0; i<=trnsize-1; i++)
37805 : {
37806 0 : if( ae_round(trnxy->ptr.pp_double[i][nin], _state)<0||ae_round(trnxy->ptr.pp_double[i][nin], _state)>=nout )
37807 : {
37808 0 : *info = -2;
37809 0 : ae_frame_leave(_state);
37810 0 : return;
37811 : }
37812 : }
37813 0 : for(i=0; i<=valsize-1; i++)
37814 : {
37815 0 : if( ae_round(valxy->ptr.pp_double[i][nin], _state)<0||ae_round(valxy->ptr.pp_double[i][nin], _state)>=nout )
37816 : {
37817 0 : *info = -2;
37818 0 : ae_frame_leave(_state);
37819 0 : return;
37820 : }
37821 : }
37822 : }
37823 0 : *info = 2;
37824 :
37825 : /*
37826 : * Prepare
37827 : */
37828 0 : mlpinitpreprocessor(network, trnxy, trnsize, _state);
37829 0 : ae_vector_set_length(&w, wcount-1+1, _state);
37830 0 : ae_vector_set_length(&wbest, wcount-1+1, _state);
37831 0 : ae_vector_set_length(&wfinal, wcount-1+1, _state);
37832 0 : efinal = ae_maxrealnumber;
37833 0 : for(i=0; i<=wcount-1; i++)
37834 : {
37835 0 : wfinal.ptr.p_double[i] = (double)(0);
37836 : }
37837 :
37838 : /*
37839 : * Multiple starts
37840 : */
37841 0 : rep->ncholesky = 0;
37842 0 : rep->nhess = 0;
37843 0 : rep->ngrad = 0;
37844 0 : for(pass=1; pass<=restarts; pass++)
37845 : {
37846 :
37847 : /*
37848 : * Process
37849 : */
37850 0 : if( needrandomization )
37851 : {
37852 0 : mlprandomize(network, _state);
37853 : }
37854 0 : ebest = mlperror(network, valxy, valsize, _state);
37855 0 : ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37856 0 : itbest = 0;
37857 0 : itcnt = 0;
37858 0 : ae_v_move(&w.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37859 0 : minlbfgscreate(wcount, ae_minint(wcount, 10, _state), &w, &state, _state);
37860 0 : minlbfgssetcond(&state, 0.0, 0.0, wstep, 0, _state);
37861 0 : minlbfgssetxrep(&state, ae_true, _state);
37862 0 : while(minlbfgsiteration(&state, _state))
37863 : {
37864 :
37865 : /*
37866 : * Calculate gradient
37867 : */
37868 0 : if( state.needfg )
37869 : {
37870 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37871 0 : mlpgradnbatch(network, trnxy, trnsize, &state.f, &state.g, _state);
37872 0 : v = ae_v_dotproduct(&network->weights.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37873 0 : state.f = state.f+0.5*decay*v;
37874 0 : ae_v_addd(&state.g.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
37875 0 : rep->ngrad = rep->ngrad+1;
37876 : }
37877 :
37878 : /*
37879 : * Validation set
37880 : */
37881 0 : if( state.xupdated )
37882 : {
37883 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &state.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37884 0 : e = mlperror(network, valxy, valsize, _state);
37885 0 : if( ae_fp_less(e,ebest) )
37886 : {
37887 0 : ebest = e;
37888 0 : ae_v_move(&wbest.ptr.p_double[0], 1, &network->weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37889 0 : itbest = itcnt;
37890 : }
37891 0 : if( itcnt>30&&ae_fp_greater((double)(itcnt),1.5*itbest) )
37892 : {
37893 0 : *info = 6;
37894 0 : break;
37895 : }
37896 0 : itcnt = itcnt+1;
37897 : }
37898 : }
37899 0 : minlbfgsresults(&state, &w, &internalrep, _state);
37900 :
37901 : /*
37902 : * Compare with final answer
37903 : */
37904 0 : if( ae_fp_less(ebest,efinal) )
37905 : {
37906 0 : ae_v_move(&wfinal.ptr.p_double[0], 1, &wbest.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37907 0 : efinal = ebest;
37908 : }
37909 : }
37910 :
37911 : /*
37912 : * The best network
37913 : */
37914 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &wfinal.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
37915 0 : ae_frame_leave(_state);
37916 : }
37917 :
37918 :
37919 : /*************************************************************************
37920 : Cross-validation estimate of generalization error.
37921 :
37922 : Base algorithm - L-BFGS.
37923 :
37924 : INPUT PARAMETERS:
37925 : Network - neural network with initialized geometry. Network is
37926 : not changed during cross-validation - it is used only
37927 : as a representative of its architecture.
37928 : XY - training set.
37929 : SSize - training set size
37930 : Decay - weight decay, same as in MLPTrainLBFGS
37931 : Restarts - number of restarts, >0.
37932 : restarts are counted for each partition separately, so
37933 : total number of restarts will be Restarts*FoldsCount.
37934 : WStep - stopping criterion, same as in MLPTrainLBFGS
37935 : MaxIts - stopping criterion, same as in MLPTrainLBFGS
37936 : FoldsCount - number of folds in k-fold cross-validation,
37937 : 2<=FoldsCount<=SSize.
37938 : recommended value: 10.
37939 :
37940 : OUTPUT PARAMETERS:
37941 : Info - return code, same as in MLPTrainLBFGS
37942 : Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
37943 : CVRep - generalization error estimates
37944 :
37945 : -- ALGLIB --
37946 : Copyright 09.12.2007 by Bochkanov Sergey
37947 : *************************************************************************/
37948 0 : void mlpkfoldcvlbfgs(multilayerperceptron* network,
37949 : /* Real */ ae_matrix* xy,
37950 : ae_int_t npoints,
37951 : double decay,
37952 : ae_int_t restarts,
37953 : double wstep,
37954 : ae_int_t maxits,
37955 : ae_int_t foldscount,
37956 : ae_int_t* info,
37957 : mlpreport* rep,
37958 : mlpcvreport* cvrep,
37959 : ae_state *_state)
37960 : {
37961 :
37962 0 : *info = 0;
37963 0 : _mlpreport_clear(rep);
37964 0 : _mlpcvreport_clear(cvrep);
37965 :
37966 0 : mlptrain_mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, ae_false, wstep, maxits, info, rep, cvrep, _state);
37967 0 : }
37968 :
37969 :
37970 : /*************************************************************************
37971 : Cross-validation estimate of generalization error.
37972 :
37973 : Base algorithm - Levenberg-Marquardt.
37974 :
37975 : INPUT PARAMETERS:
37976 : Network - neural network with initialized geometry. Network is
37977 : not changed during cross-validation - it is used only
37978 : as a representative of its architecture.
37979 : XY - training set.
37980 : SSize - training set size
37981 : Decay - weight decay, same as in MLPTrainLBFGS
37982 : Restarts - number of restarts, >0.
37983 : restarts are counted for each partition separately, so
37984 : total number of restarts will be Restarts*FoldsCount.
37985 : FoldsCount - number of folds in k-fold cross-validation,
37986 : 2<=FoldsCount<=SSize.
37987 : recommended value: 10.
37988 :
37989 : OUTPUT PARAMETERS:
37990 : Info - return code, same as in MLPTrainLBFGS
37991 : Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
37992 : CVRep - generalization error estimates
37993 :
37994 : -- ALGLIB --
37995 : Copyright 09.12.2007 by Bochkanov Sergey
37996 : *************************************************************************/
37997 0 : void mlpkfoldcvlm(multilayerperceptron* network,
37998 : /* Real */ ae_matrix* xy,
37999 : ae_int_t npoints,
38000 : double decay,
38001 : ae_int_t restarts,
38002 : ae_int_t foldscount,
38003 : ae_int_t* info,
38004 : mlpreport* rep,
38005 : mlpcvreport* cvrep,
38006 : ae_state *_state)
38007 : {
38008 :
38009 0 : *info = 0;
38010 0 : _mlpreport_clear(rep);
38011 0 : _mlpcvreport_clear(cvrep);
38012 :
38013 0 : mlptrain_mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, ae_true, 0.0, 0, info, rep, cvrep, _state);
38014 0 : }
38015 :
38016 :
38017 : /*************************************************************************
38018 : This function estimates generalization error using cross-validation on the
38019 : current dataset with current training settings.
38020 :
38021 : ! COMMERCIAL EDITION OF ALGLIB:
38022 : !
38023 : ! Commercial Edition of ALGLIB includes following important improvements
38024 : ! of this function:
38025 : ! * high-performance native backend with same C# interface (C# version)
38026 : ! * multithreading support (C++ and C# versions)
38027 : !
38028 : ! We recommend you to read 'Working with commercial version' section of
38029 : ! ALGLIB Reference Manual in order to find out how to use performance-
38030 : ! related features provided by commercial edition of ALGLIB.
38031 :
38032 : INPUT PARAMETERS:
38033 : S - trainer object
38034 : Network - neural network. It must have same number of inputs and
38035 : output/classes as was specified during creation of the
38036 : trainer object. Network is not changed during cross-
38037 : validation and is not trained - it is used only as
38038 : representative of its architecture. I.e., we estimate
38039 : generalization properties of ARCHITECTURE, not some
38040 : specific network.
38041 : NRestarts - number of restarts, >=0:
38042 : * NRestarts>0 means that for each cross-validation
38043 : round specified number of random restarts is
38044 : performed, with best network being chosen after
38045 : training.
38046 : * NRestarts=0 is same as NRestarts=1
38047 : FoldsCount - number of folds in k-fold cross-validation:
38048 : * 2<=FoldsCount<=size of dataset
38049 : * recommended value: 10.
38050 : * values larger than dataset size will be silently
38051 : truncated down to dataset size
38052 :
38053 : OUTPUT PARAMETERS:
38054 : Rep - structure which contains cross-validation estimates:
38055 : * Rep.RelCLSError - fraction of misclassified cases.
38056 : * Rep.AvgCE - acerage cross-entropy
38057 : * Rep.RMSError - root-mean-square error
38058 : * Rep.AvgError - average error
38059 : * Rep.AvgRelError - average relative error
38060 :
38061 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
38062 : or subset with only one point was given, zeros are returned as
38063 : estimates.
38064 :
38065 : NOTE: this method performs FoldsCount cross-validation rounds, each one
38066 : with NRestarts random starts. Thus, FoldsCount*NRestarts networks
38067 : are trained in total.
38068 :
38069 : NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems.
38070 :
38071 : NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError
38072 : contain errors in prediction of posterior probabilities.
38073 :
38074 : -- ALGLIB --
38075 : Copyright 23.07.2012 by Bochkanov Sergey
38076 : *************************************************************************/
38077 0 : void mlpkfoldcv(mlptrainer* s,
38078 : multilayerperceptron* network,
38079 : ae_int_t nrestarts,
38080 : ae_int_t foldscount,
38081 : mlpreport* rep,
38082 : ae_state *_state)
38083 : {
38084 : ae_frame _frame_block;
38085 : ae_shared_pool pooldatacv;
38086 : mlpparallelizationcv datacv;
38087 : mlpparallelizationcv *sdatacv;
38088 : ae_smart_ptr _sdatacv;
38089 : ae_matrix cvy;
38090 : ae_vector folds;
38091 : ae_vector buf;
38092 : ae_vector dy;
38093 : ae_int_t nin;
38094 : ae_int_t nout;
38095 : ae_int_t wcount;
38096 : ae_int_t rowsize;
38097 : ae_int_t ntype;
38098 : ae_int_t ttype;
38099 : ae_int_t i;
38100 : ae_int_t j;
38101 : ae_int_t k;
38102 : hqrndstate rs;
38103 :
38104 0 : ae_frame_make(_state, &_frame_block);
38105 0 : memset(&pooldatacv, 0, sizeof(pooldatacv));
38106 0 : memset(&datacv, 0, sizeof(datacv));
38107 0 : memset(&_sdatacv, 0, sizeof(_sdatacv));
38108 0 : memset(&cvy, 0, sizeof(cvy));
38109 0 : memset(&folds, 0, sizeof(folds));
38110 0 : memset(&buf, 0, sizeof(buf));
38111 0 : memset(&dy, 0, sizeof(dy));
38112 0 : memset(&rs, 0, sizeof(rs));
38113 0 : _mlpreport_clear(rep);
38114 0 : ae_shared_pool_init(&pooldatacv, _state, ae_true);
38115 0 : _mlpparallelizationcv_init(&datacv, _state, ae_true);
38116 0 : ae_smart_ptr_init(&_sdatacv, (void**)&sdatacv, _state, ae_true);
38117 0 : ae_matrix_init(&cvy, 0, 0, DT_REAL, _state, ae_true);
38118 0 : ae_vector_init(&folds, 0, DT_INT, _state, ae_true);
38119 0 : ae_vector_init(&buf, 0, DT_REAL, _state, ae_true);
38120 0 : ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
38121 0 : _hqrndstate_init(&rs, _state, ae_true);
38122 :
38123 0 : if( !mlpissoftmax(network, _state) )
38124 : {
38125 0 : ntype = 0;
38126 : }
38127 : else
38128 : {
38129 0 : ntype = 1;
38130 : }
38131 0 : if( s->rcpar )
38132 : {
38133 0 : ttype = 0;
38134 : }
38135 : else
38136 : {
38137 0 : ttype = 1;
38138 : }
38139 0 : ae_assert(ntype==ttype, "MLPKFoldCV: type of input network is not similar to network type in trainer object", _state);
38140 0 : ae_assert(s->npoints>=0, "MLPKFoldCV: possible trainer S is not initialized(S.NPoints<0)", _state);
38141 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
38142 0 : ae_assert(s->nin==nin, "MLPKFoldCV: number of inputs in trainer is not equal to number of inputs in network", _state);
38143 0 : ae_assert(s->nout==nout, "MLPKFoldCV: number of outputs in trainer is not equal to number of outputs in network", _state);
38144 0 : ae_assert(nrestarts>=0, "MLPKFoldCV: NRestarts<0", _state);
38145 0 : ae_assert(foldscount>=2, "MLPKFoldCV: FoldsCount<2", _state);
38146 0 : if( foldscount>s->npoints )
38147 : {
38148 0 : foldscount = s->npoints;
38149 : }
38150 0 : rep->relclserror = (double)(0);
38151 0 : rep->avgce = (double)(0);
38152 0 : rep->rmserror = (double)(0);
38153 0 : rep->avgerror = (double)(0);
38154 0 : rep->avgrelerror = (double)(0);
38155 0 : hqrndrandomize(&rs, _state);
38156 0 : rep->ngrad = 0;
38157 0 : rep->nhess = 0;
38158 0 : rep->ncholesky = 0;
38159 0 : if( s->npoints==0||s->npoints==1 )
38160 : {
38161 0 : ae_frame_leave(_state);
38162 0 : return;
38163 : }
38164 :
38165 : /*
38166 : * Read network geometry, test parameters
38167 : */
38168 0 : if( s->rcpar )
38169 : {
38170 0 : rowsize = nin+nout;
38171 0 : ae_vector_set_length(&dy, nout, _state);
38172 0 : dserrallocate(-nout, &buf, _state);
38173 : }
38174 : else
38175 : {
38176 0 : rowsize = nin+1;
38177 0 : ae_vector_set_length(&dy, 1, _state);
38178 0 : dserrallocate(nout, &buf, _state);
38179 : }
38180 :
38181 : /*
38182 : * Folds
38183 : */
38184 0 : ae_vector_set_length(&folds, s->npoints, _state);
38185 0 : for(i=0; i<=s->npoints-1; i++)
38186 : {
38187 0 : folds.ptr.p_int[i] = i*foldscount/s->npoints;
38188 : }
38189 0 : for(i=0; i<=s->npoints-2; i++)
38190 : {
38191 0 : j = i+hqrnduniformi(&rs, s->npoints-i, _state);
38192 0 : if( j!=i )
38193 : {
38194 0 : k = folds.ptr.p_int[i];
38195 0 : folds.ptr.p_int[i] = folds.ptr.p_int[j];
38196 0 : folds.ptr.p_int[j] = k;
38197 : }
38198 : }
38199 0 : ae_matrix_set_length(&cvy, s->npoints, nout, _state);
38200 :
38201 : /*
38202 : * Initialize SEED-value for shared pool
38203 : */
38204 0 : datacv.ngrad = 0;
38205 0 : mlpcopy(network, &datacv.network, _state);
38206 0 : ae_vector_set_length(&datacv.subset, s->npoints, _state);
38207 0 : ae_vector_set_length(&datacv.xyrow, rowsize, _state);
38208 0 : ae_vector_set_length(&datacv.y, nout, _state);
38209 :
38210 : /*
38211 : * Create shared pool
38212 : */
38213 0 : ae_shared_pool_set_seed(&pooldatacv, &datacv, sizeof(datacv), _mlpparallelizationcv_init, _mlpparallelizationcv_init_copy, _mlpparallelizationcv_destroy, _state);
38214 :
38215 : /*
38216 : * Parallelization
38217 : */
38218 0 : mlptrain_mthreadcv(s, rowsize, nrestarts, &folds, 0, foldscount, &cvy, &pooldatacv, wcount, _state);
38219 :
38220 : /*
38221 : * Calculate value for NGrad
38222 : */
38223 0 : ae_shared_pool_first_recycled(&pooldatacv, &_sdatacv, _state);
38224 0 : while(sdatacv!=NULL)
38225 : {
38226 0 : rep->ngrad = rep->ngrad+sdatacv->ngrad;
38227 0 : ae_shared_pool_next_recycled(&pooldatacv, &_sdatacv, _state);
38228 : }
38229 :
38230 : /*
38231 : * Connect of results and calculate cross-validation error
38232 : */
38233 0 : for(i=0; i<=s->npoints-1; i++)
38234 : {
38235 0 : if( s->datatype==0 )
38236 : {
38237 0 : ae_v_move(&datacv.xyrow.ptr.p_double[0], 1, &s->densexy.ptr.pp_double[i][0], 1, ae_v_len(0,rowsize-1));
38238 : }
38239 0 : if( s->datatype==1 )
38240 : {
38241 0 : sparsegetrow(&s->sparsexy, i, &datacv.xyrow, _state);
38242 : }
38243 0 : ae_v_move(&datacv.y.ptr.p_double[0], 1, &cvy.ptr.pp_double[i][0], 1, ae_v_len(0,nout-1));
38244 0 : if( s->rcpar )
38245 : {
38246 0 : ae_v_move(&dy.ptr.p_double[0], 1, &datacv.xyrow.ptr.p_double[nin], 1, ae_v_len(0,nout-1));
38247 : }
38248 : else
38249 : {
38250 0 : dy.ptr.p_double[0] = datacv.xyrow.ptr.p_double[nin];
38251 : }
38252 0 : dserraccumulate(&buf, &datacv.y, &dy, _state);
38253 : }
38254 0 : dserrfinish(&buf, _state);
38255 0 : rep->relclserror = buf.ptr.p_double[0];
38256 0 : rep->avgce = buf.ptr.p_double[1];
38257 0 : rep->rmserror = buf.ptr.p_double[2];
38258 0 : rep->avgerror = buf.ptr.p_double[3];
38259 0 : rep->avgrelerror = buf.ptr.p_double[4];
38260 0 : ae_frame_leave(_state);
38261 : }
38262 :
38263 :
38264 : /*************************************************************************
38265 : Creation of the network trainer object for regression networks
38266 :
38267 : INPUT PARAMETERS:
38268 : NIn - number of inputs, NIn>=1
38269 : NOut - number of outputs, NOut>=1
38270 :
38271 : OUTPUT PARAMETERS:
38272 : S - neural network trainer object.
38273 : This structure can be used to train any regression
38274 : network with NIn inputs and NOut outputs.
38275 :
38276 : -- ALGLIB --
38277 : Copyright 23.07.2012 by Bochkanov Sergey
38278 : *************************************************************************/
38279 0 : void mlpcreatetrainer(ae_int_t nin,
38280 : ae_int_t nout,
38281 : mlptrainer* s,
38282 : ae_state *_state)
38283 : {
38284 :
38285 0 : _mlptrainer_clear(s);
38286 :
38287 0 : ae_assert(nin>=1, "MLPCreateTrainer: NIn<1.", _state);
38288 0 : ae_assert(nout>=1, "MLPCreateTrainer: NOut<1.", _state);
38289 0 : s->nin = nin;
38290 0 : s->nout = nout;
38291 0 : s->rcpar = ae_true;
38292 0 : s->lbfgsfactor = mlptrain_defaultlbfgsfactor;
38293 0 : s->decay = 1.0E-6;
38294 0 : mlpsetcond(s, (double)(0), 0, _state);
38295 0 : s->datatype = 0;
38296 0 : s->npoints = 0;
38297 0 : mlpsetalgobatch(s, _state);
38298 0 : }
38299 :
38300 :
38301 : /*************************************************************************
38302 : Creation of the network trainer object for classification networks
38303 :
38304 : INPUT PARAMETERS:
38305 : NIn - number of inputs, NIn>=1
38306 : NClasses - number of classes, NClasses>=2
38307 :
38308 : OUTPUT PARAMETERS:
38309 : S - neural network trainer object.
38310 : This structure can be used to train any classification
38311 : network with NIn inputs and NOut outputs.
38312 :
38313 : -- ALGLIB --
38314 : Copyright 23.07.2012 by Bochkanov Sergey
38315 : *************************************************************************/
38316 0 : void mlpcreatetrainercls(ae_int_t nin,
38317 : ae_int_t nclasses,
38318 : mlptrainer* s,
38319 : ae_state *_state)
38320 : {
38321 :
38322 0 : _mlptrainer_clear(s);
38323 :
38324 0 : ae_assert(nin>=1, "MLPCreateTrainerCls: NIn<1.", _state);
38325 0 : ae_assert(nclasses>=2, "MLPCreateTrainerCls: NClasses<2.", _state);
38326 0 : s->nin = nin;
38327 0 : s->nout = nclasses;
38328 0 : s->rcpar = ae_false;
38329 0 : s->lbfgsfactor = mlptrain_defaultlbfgsfactor;
38330 0 : s->decay = 1.0E-6;
38331 0 : mlpsetcond(s, (double)(0), 0, _state);
38332 0 : s->datatype = 0;
38333 0 : s->npoints = 0;
38334 0 : mlpsetalgobatch(s, _state);
38335 0 : }
38336 :
38337 :
38338 : /*************************************************************************
38339 : This function sets "current dataset" of the trainer object to one passed
38340 : by user.
38341 :
38342 : INPUT PARAMETERS:
38343 : S - trainer object
38344 : XY - training set, see below for information on the
38345 : training set format. This function checks correctness
38346 : of the dataset (no NANs/INFs, class numbers are
38347 : correct) and throws exception when incorrect dataset
38348 : is passed.
38349 : NPoints - points count, >=0.
38350 :
38351 : DATASET FORMAT:
38352 :
38353 : This function uses two different dataset formats - one for regression
38354 : networks, another one for classification networks.
38355 :
38356 : For regression networks with NIn inputs and NOut outputs following dataset
38357 : format is used:
38358 : * dataset is given by NPoints*(NIn+NOut) matrix
38359 : * each row corresponds to one example
38360 : * first NIn columns are inputs, next NOut columns are outputs
38361 :
38362 : For classification networks with NIn inputs and NClasses clases following
38363 : datasetformat is used:
38364 : * dataset is given by NPoints*(NIn+1) matrix
38365 : * each row corresponds to one example
38366 : * first NIn columns are inputs, last column stores class number (from 0 to
38367 : NClasses-1).
38368 :
38369 : -- ALGLIB --
38370 : Copyright 23.07.2012 by Bochkanov Sergey
38371 : *************************************************************************/
38372 0 : void mlpsetdataset(mlptrainer* s,
38373 : /* Real */ ae_matrix* xy,
38374 : ae_int_t npoints,
38375 : ae_state *_state)
38376 : {
38377 : ae_int_t ndim;
38378 : ae_int_t i;
38379 : ae_int_t j;
38380 :
38381 :
38382 0 : ae_assert(s->nin>=1, "MLPSetDataset: possible parameter S is not initialized or spoiled(S.NIn<=0).", _state);
38383 0 : ae_assert(npoints>=0, "MLPSetDataset: NPoint<0", _state);
38384 0 : ae_assert(npoints<=xy->rows, "MLPSetDataset: invalid size of matrix XY(NPoint more then rows of matrix XY)", _state);
38385 0 : s->datatype = 0;
38386 0 : s->npoints = npoints;
38387 0 : if( npoints==0 )
38388 : {
38389 0 : return;
38390 : }
38391 0 : if( s->rcpar )
38392 : {
38393 0 : ae_assert(s->nout>=1, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression).", _state);
38394 0 : ndim = s->nin+s->nout;
38395 0 : ae_assert(ndim<=xy->cols, "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY).", _state);
38396 0 : ae_assert(apservisfinitematrix(xy, npoints, ndim, _state), "MLPSetDataset: parameter XY contains Infinite or NaN.", _state);
38397 : }
38398 : else
38399 : {
38400 0 : ae_assert(s->nout>=2, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier).", _state);
38401 0 : ndim = s->nin+1;
38402 0 : ae_assert(ndim<=xy->cols, "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY).", _state);
38403 0 : ae_assert(apservisfinitematrix(xy, npoints, ndim, _state), "MLPSetDataset: parameter XY contains Infinite or NaN.", _state);
38404 0 : for(i=0; i<=npoints-1; i++)
38405 : {
38406 0 : ae_assert(ae_round(xy->ptr.pp_double[i][s->nin], _state)>=0&&ae_round(xy->ptr.pp_double[i][s->nin], _state)<s->nout, "MLPSetDataset: invalid parameter XY(in classifier used nonexistent class number: either XY[.,NIn]<0 or XY[.,NIn]>=NClasses).", _state);
38407 : }
38408 : }
38409 0 : rmatrixsetlengthatleast(&s->densexy, npoints, ndim, _state);
38410 0 : for(i=0; i<=npoints-1; i++)
38411 : {
38412 0 : for(j=0; j<=ndim-1; j++)
38413 : {
38414 0 : s->densexy.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
38415 : }
38416 : }
38417 : }
38418 :
38419 :
38420 : /*************************************************************************
38421 : This function sets "current dataset" of the trainer object to one passed
38422 : by user (sparse matrix is used to store dataset).
38423 :
38424 : INPUT PARAMETERS:
38425 : S - trainer object
38426 : XY - training set, see below for information on the
38427 : training set format. This function checks correctness
38428 : of the dataset (no NANs/INFs, class numbers are
38429 : correct) and throws exception when incorrect dataset
38430 : is passed. Any sparse storage format can be used:
38431 : Hash-table, CRS...
38432 : NPoints - points count, >=0
38433 :
38434 : DATASET FORMAT:
38435 :
38436 : This function uses two different dataset formats - one for regression
38437 : networks, another one for classification networks.
38438 :
38439 : For regression networks with NIn inputs and NOut outputs following dataset
38440 : format is used:
38441 : * dataset is given by NPoints*(NIn+NOut) matrix
38442 : * each row corresponds to one example
38443 : * first NIn columns are inputs, next NOut columns are outputs
38444 :
38445 : For classification networks with NIn inputs and NClasses clases following
38446 : datasetformat is used:
38447 : * dataset is given by NPoints*(NIn+1) matrix
38448 : * each row corresponds to one example
38449 : * first NIn columns are inputs, last column stores class number (from 0 to
38450 : NClasses-1).
38451 :
38452 : -- ALGLIB --
38453 : Copyright 23.07.2012 by Bochkanov Sergey
38454 : *************************************************************************/
38455 0 : void mlpsetsparsedataset(mlptrainer* s,
38456 : sparsematrix* xy,
38457 : ae_int_t npoints,
38458 : ae_state *_state)
38459 : {
38460 : double v;
38461 : ae_int_t t0;
38462 : ae_int_t t1;
38463 : ae_int_t i;
38464 : ae_int_t j;
38465 :
38466 :
38467 :
38468 : /*
38469 : * Check correctness of the data
38470 : */
38471 0 : ae_assert(s->nin>0, "MLPSetSparseDataset: possible parameter S is not initialized or spoiled(S.NIn<=0).", _state);
38472 0 : ae_assert(npoints>=0, "MLPSetSparseDataset: NPoint<0", _state);
38473 0 : ae_assert(npoints<=sparsegetnrows(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(NPoint more then rows of matrix XY)", _state);
38474 0 : if( npoints>0 )
38475 : {
38476 0 : t0 = 0;
38477 0 : t1 = 0;
38478 0 : if( s->rcpar )
38479 : {
38480 0 : ae_assert(s->nout>=1, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression).", _state);
38481 0 : ae_assert(s->nin+s->nout<=sparsegetncols(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY).", _state);
38482 0 : while(sparseenumerate(xy, &t0, &t1, &i, &j, &v, _state))
38483 : {
38484 0 : if( i<npoints&&j<s->nin+s->nout )
38485 : {
38486 0 : ae_assert(ae_isfinite(v, _state), "MLPSetSparseDataset: sparse matrix XY contains Infinite or NaN.", _state);
38487 : }
38488 : }
38489 : }
38490 : else
38491 : {
38492 0 : ae_assert(s->nout>=2, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier).", _state);
38493 0 : ae_assert(s->nin+1<=sparsegetncols(xy, _state), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY).", _state);
38494 0 : while(sparseenumerate(xy, &t0, &t1, &i, &j, &v, _state))
38495 : {
38496 0 : if( i<npoints&&j<=s->nin )
38497 : {
38498 0 : if( j!=s->nin )
38499 : {
38500 0 : ae_assert(ae_isfinite(v, _state), "MLPSetSparseDataset: sparse matrix XY contains Infinite or NaN.", _state);
38501 : }
38502 : else
38503 : {
38504 0 : ae_assert((ae_isfinite(v, _state)&&ae_round(v, _state)>=0)&&ae_round(v, _state)<s->nout, "MLPSetSparseDataset: invalid sparse matrix XY(in classifier used nonexistent class number: either XY[.,NIn]<0 or XY[.,NIn]>=NClasses).", _state);
38505 : }
38506 : }
38507 : }
38508 : }
38509 : }
38510 :
38511 : /*
38512 : * Set dataset
38513 : */
38514 0 : s->datatype = 1;
38515 0 : s->npoints = npoints;
38516 0 : sparsecopytocrs(xy, &s->sparsexy, _state);
38517 0 : }
38518 :
38519 :
38520 : /*************************************************************************
38521 : This function sets weight decay coefficient which is used for training.
38522 :
38523 : INPUT PARAMETERS:
38524 : S - trainer object
38525 : Decay - weight decay coefficient, >=0. Weight decay term
38526 : 'Decay*||Weights||^2' is added to error function. If
38527 : you don't know what Decay to choose, use 1.0E-3.
38528 : Weight decay can be set to zero, in this case network
38529 : is trained without weight decay.
38530 :
38531 : NOTE: by default network uses some small nonzero value for weight decay.
38532 :
38533 : -- ALGLIB --
38534 : Copyright 23.07.2012 by Bochkanov Sergey
38535 : *************************************************************************/
38536 0 : void mlpsetdecay(mlptrainer* s, double decay, ae_state *_state)
38537 : {
38538 :
38539 :
38540 0 : ae_assert(ae_isfinite(decay, _state), "MLPSetDecay: parameter Decay contains Infinite or NaN.", _state);
38541 0 : ae_assert(ae_fp_greater_eq(decay,(double)(0)), "MLPSetDecay: Decay<0.", _state);
38542 0 : s->decay = decay;
38543 0 : }
38544 :
38545 :
38546 : /*************************************************************************
38547 : This function sets stopping criteria for the optimizer.
38548 :
38549 : INPUT PARAMETERS:
38550 : S - trainer object
38551 : WStep - stopping criterion. Algorithm stops if step size is
38552 : less than WStep. Recommended value - 0.01. Zero step
38553 : size means stopping after MaxIts iterations.
38554 : WStep>=0.
38555 : MaxIts - stopping criterion. Algorithm stops after MaxIts
38556 : epochs (full passes over entire dataset). Zero MaxIts
38557 : means stopping when step is sufficiently small.
38558 : MaxIts>=0.
38559 :
38560 : NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also
38561 : used when MLPSetCond() is called with WStep=0 and MaxIts=0.
38562 :
38563 : NOTE: these stopping criteria are used for all kinds of neural training -
38564 : from "conventional" networks to early stopping ensembles. When used
38565 : for "conventional" networks, they are used as the only stopping
38566 : criteria. When combined with early stopping, they used as ADDITIONAL
38567 : stopping criteria which can terminate early stopping algorithm.
38568 :
38569 : -- ALGLIB --
38570 : Copyright 23.07.2012 by Bochkanov Sergey
38571 : *************************************************************************/
38572 0 : void mlpsetcond(mlptrainer* s,
38573 : double wstep,
38574 : ae_int_t maxits,
38575 : ae_state *_state)
38576 : {
38577 :
38578 :
38579 0 : ae_assert(ae_isfinite(wstep, _state), "MLPSetCond: parameter WStep contains Infinite or NaN.", _state);
38580 0 : ae_assert(ae_fp_greater_eq(wstep,(double)(0)), "MLPSetCond: WStep<0.", _state);
38581 0 : ae_assert(maxits>=0, "MLPSetCond: MaxIts<0.", _state);
38582 0 : if( ae_fp_neq(wstep,(double)(0))||maxits!=0 )
38583 : {
38584 0 : s->wstep = wstep;
38585 0 : s->maxits = maxits;
38586 : }
38587 : else
38588 : {
38589 0 : s->wstep = 0.005;
38590 0 : s->maxits = 0;
38591 : }
38592 0 : }
38593 :
38594 :
38595 : /*************************************************************************
38596 : This function sets training algorithm: batch training using L-BFGS will be
38597 : used.
38598 :
38599 : This algorithm:
38600 : * the most robust for small-scale problems, but may be too slow for large
38601 : scale ones.
38602 : * perfoms full pass through the dataset before performing step
38603 : * uses conditions specified by MLPSetCond() for stopping
38604 : * is default one used by trainer object
38605 :
38606 : INPUT PARAMETERS:
38607 : S - trainer object
38608 :
38609 : -- ALGLIB --
38610 : Copyright 23.07.2012 by Bochkanov Sergey
38611 : *************************************************************************/
38612 0 : void mlpsetalgobatch(mlptrainer* s, ae_state *_state)
38613 : {
38614 :
38615 :
38616 0 : s->algokind = 0;
38617 0 : }
38618 :
38619 :
38620 : /*************************************************************************
38621 : This function trains neural network passed to this function, using current
38622 : dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
38623 : and current training settings. Training from NRestarts random starting
38624 : positions is performed, best network is chosen.
38625 :
38626 : Training is performed using current training algorithm.
38627 :
38628 : ! COMMERCIAL EDITION OF ALGLIB:
38629 : !
38630 : ! Commercial Edition of ALGLIB includes following important improvements
38631 : ! of this function:
38632 : ! * high-performance native backend with same C# interface (C# version)
38633 : ! * multithreading support (C++ and C# versions)
38634 : !
38635 : ! We recommend you to read 'Working with commercial version' section of
38636 : ! ALGLIB Reference Manual in order to find out how to use performance-
38637 : ! related features provided by commercial edition of ALGLIB.
38638 :
38639 : INPUT PARAMETERS:
38640 : S - trainer object
38641 : Network - neural network. It must have same number of inputs and
38642 : output/classes as was specified during creation of the
38643 : trainer object.
38644 : NRestarts - number of restarts, >=0:
38645 : * NRestarts>0 means that specified number of random
38646 : restarts are performed, best network is chosen after
38647 : training
38648 : * NRestarts=0 means that current state of the network
38649 : is used for training.
38650 :
38651 : OUTPUT PARAMETERS:
38652 : Network - trained network
38653 :
38654 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
38655 : network is filled by zero values. Same behavior for functions
38656 : MLPStartTraining and MLPContinueTraining.
38657 :
38658 : NOTE: this method uses sum-of-squares error function for training.
38659 :
38660 : -- ALGLIB --
38661 : Copyright 23.07.2012 by Bochkanov Sergey
38662 : *************************************************************************/
38663 0 : void mlptrainnetwork(mlptrainer* s,
38664 : multilayerperceptron* network,
38665 : ae_int_t nrestarts,
38666 : mlpreport* rep,
38667 : ae_state *_state)
38668 : {
38669 : ae_frame _frame_block;
38670 : ae_int_t nin;
38671 : ae_int_t nout;
38672 : ae_int_t wcount;
38673 : ae_int_t ntype;
38674 : ae_int_t ttype;
38675 : ae_shared_pool trnpool;
38676 :
38677 0 : ae_frame_make(_state, &_frame_block);
38678 0 : memset(&trnpool, 0, sizeof(trnpool));
38679 0 : _mlpreport_clear(rep);
38680 0 : ae_shared_pool_init(&trnpool, _state, ae_true);
38681 :
38682 0 : ae_assert(s->npoints>=0, "MLPTrainNetwork: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
38683 0 : if( !mlpissoftmax(network, _state) )
38684 : {
38685 0 : ntype = 0;
38686 : }
38687 : else
38688 : {
38689 0 : ntype = 1;
38690 : }
38691 0 : if( s->rcpar )
38692 : {
38693 0 : ttype = 0;
38694 : }
38695 : else
38696 : {
38697 0 : ttype = 1;
38698 : }
38699 0 : ae_assert(ntype==ttype, "MLPTrainNetwork: type of input network is not similar to network type in trainer object", _state);
38700 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
38701 0 : ae_assert(s->nin==nin, "MLPTrainNetwork: number of inputs in trainer is not equal to number of inputs in network", _state);
38702 0 : ae_assert(s->nout==nout, "MLPTrainNetwork: number of outputs in trainer is not equal to number of outputs in network", _state);
38703 0 : ae_assert(nrestarts>=0, "MLPTrainNetwork: NRestarts<0.", _state);
38704 :
38705 : /*
38706 : * Train
38707 : */
38708 0 : mlptrain_mlptrainnetworkx(s, nrestarts, -1, &s->subset, -1, &s->subset, 0, network, rep, ae_true, &trnpool, _state);
38709 0 : ae_frame_leave(_state);
38710 0 : }
38711 :
38712 :
38713 : /*************************************************************************
38714 : IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
38715 : not recommend you to use it unless you are pretty sure that you
38716 : need ability to monitor training progress.
38717 :
38718 : This function performs step-by-step training of the neural network. Here
38719 : "step-by-step" means that training starts with MLPStartTraining() call,
38720 : and then user subsequently calls MLPContinueTraining() to perform one more
38721 : iteration of the training.
38722 :
38723 : After call to this function trainer object remembers network and is ready
38724 : to train it. However, no training is performed until first call to
38725 : MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
38726 : will advance training progress one iteration further.
38727 :
38728 : EXAMPLE:
38729 : >
38730 : > ...initialize network and trainer object....
38731 : >
38732 : > MLPStartTraining(Trainer, Network, True)
38733 : > while MLPContinueTraining(Trainer, Network) do
38734 : > ...visualize training progress...
38735 : >
38736 :
38737 : INPUT PARAMETERS:
38738 : S - trainer object
38739 : Network - neural network. It must have same number of inputs and
38740 : output/classes as was specified during creation of the
38741 : trainer object.
38742 : RandomStart - randomize network before training or not:
38743 : * True means that network is randomized and its
38744 : initial state (one which was passed to the trainer
38745 : object) is lost.
38746 : * False means that training is started from the
38747 : current state of the network
38748 :
38749 : OUTPUT PARAMETERS:
38750 : Network - neural network which is ready to training (weights are
38751 : initialized, preprocessor is initialized using current
38752 : training set)
38753 :
38754 : NOTE: this method uses sum-of-squares error function for training.
38755 :
38756 : NOTE: it is expected that trainer object settings are NOT changed during
38757 : step-by-step training, i.e. no one changes stopping criteria or
38758 : training set during training. It is possible and there is no defense
38759 : against such actions, but algorithm behavior in such cases is
38760 : undefined and can be unpredictable.
38761 :
38762 : -- ALGLIB --
38763 : Copyright 23.07.2012 by Bochkanov Sergey
38764 : *************************************************************************/
38765 0 : void mlpstarttraining(mlptrainer* s,
38766 : multilayerperceptron* network,
38767 : ae_bool randomstart,
38768 : ae_state *_state)
38769 : {
38770 : ae_int_t nin;
38771 : ae_int_t nout;
38772 : ae_int_t wcount;
38773 : ae_int_t ntype;
38774 : ae_int_t ttype;
38775 :
38776 :
38777 0 : ae_assert(s->npoints>=0, "MLPStartTraining: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
38778 0 : if( !mlpissoftmax(network, _state) )
38779 : {
38780 0 : ntype = 0;
38781 : }
38782 : else
38783 : {
38784 0 : ntype = 1;
38785 : }
38786 0 : if( s->rcpar )
38787 : {
38788 0 : ttype = 0;
38789 : }
38790 : else
38791 : {
38792 0 : ttype = 1;
38793 : }
38794 0 : ae_assert(ntype==ttype, "MLPStartTraining: type of input network is not similar to network type in trainer object", _state);
38795 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
38796 0 : ae_assert(s->nin==nin, "MLPStartTraining: number of inputs in trainer is not equal to number of inputs in the network.", _state);
38797 0 : ae_assert(s->nout==nout, "MLPStartTraining: number of outputs in trainer is not equal to number of outputs in the network.", _state);
38798 :
38799 : /*
38800 : * Initialize temporaries
38801 : */
38802 0 : mlptrain_initmlptrnsession(network, randomstart, s, &s->session, _state);
38803 :
38804 : /*
38805 : * Train network
38806 : */
38807 0 : mlptrain_mlpstarttrainingx(s, randomstart, -1, &s->subset, -1, &s->session, _state);
38808 :
38809 : /*
38810 : * Update network
38811 : */
38812 0 : mlpcopytunableparameters(&s->session.network, network, _state);
38813 0 : }
38814 :
38815 :
38816 : /*************************************************************************
38817 : IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
38818 : not recommend you to use it unless you are pretty sure that you
38819 : need ability to monitor training progress.
38820 :
38821 : ! COMMERCIAL EDITION OF ALGLIB:
38822 : !
38823 : ! Commercial Edition of ALGLIB includes following important improvements
38824 : ! of this function:
38825 : ! * high-performance native backend with same C# interface (C# version)
38826 : ! * multithreading support (C++ and C# versions)
38827 : !
38828 : ! We recommend you to read 'Working with commercial version' section of
38829 : ! ALGLIB Reference Manual in order to find out how to use performance-
38830 : ! related features provided by commercial edition of ALGLIB.
38831 :
38832 : This function performs step-by-step training of the neural network. Here
38833 : "step-by-step" means that training starts with MLPStartTraining() call,
38834 : and then user subsequently calls MLPContinueTraining() to perform one more
38835 : iteration of the training.
38836 :
38837 : This function performs one more iteration of the training and returns
38838 : either True (training continues) or False (training stopped). In case True
38839 : was returned, Network weights are updated according to the current state
38840 : of the optimization progress. In case False was returned, no additional
38841 : updates is performed (previous update of the network weights moved us to
38842 : the final point, and no additional updates is needed).
38843 :
38844 : EXAMPLE:
38845 : >
38846 : > [initialize network and trainer object]
38847 : >
38848 : > MLPStartTraining(Trainer, Network, True)
38849 : > while MLPContinueTraining(Trainer, Network) do
38850 : > [visualize training progress]
38851 : >
38852 :
38853 : INPUT PARAMETERS:
38854 : S - trainer object
38855 : Network - neural network structure, which is used to store
38856 : current state of the training process.
38857 :
38858 : OUTPUT PARAMETERS:
38859 : Network - weights of the neural network are rewritten by the
38860 : current approximation.
38861 :
38862 : NOTE: this method uses sum-of-squares error function for training.
38863 :
38864 : NOTE: it is expected that trainer object settings are NOT changed during
38865 : step-by-step training, i.e. no one changes stopping criteria or
38866 : training set during training. It is possible and there is no defense
38867 : against such actions, but algorithm behavior in such cases is
38868 : undefined and can be unpredictable.
38869 :
38870 : NOTE: It is expected that Network is the same one which was passed to
38871 : MLPStartTraining() function. However, THIS function checks only
38872 : following:
38873 : * that number of network inputs is consistent with trainer object
38874 : settings
38875 : * that number of network outputs/classes is consistent with trainer
38876 : object settings
38877 : * that number of network weights is the same as number of weights in
38878 : the network passed to MLPStartTraining() function
38879 : Exception is thrown when these conditions are violated.
38880 :
38881 : It is also expected that you do not change state of the network on
38882 : your own - the only party who has right to change network during its
38883 : training is a trainer object. Any attempt to interfere with trainer
38884 : may lead to unpredictable results.
38885 :
38886 :
38887 : -- ALGLIB --
38888 : Copyright 23.07.2012 by Bochkanov Sergey
38889 : *************************************************************************/
38890 0 : ae_bool mlpcontinuetraining(mlptrainer* s,
38891 : multilayerperceptron* network,
38892 : ae_state *_state)
38893 : {
38894 : ae_int_t nin;
38895 : ae_int_t nout;
38896 : ae_int_t wcount;
38897 : ae_int_t ntype;
38898 : ae_int_t ttype;
38899 : ae_bool result;
38900 :
38901 :
38902 0 : ae_assert(s->npoints>=0, "MLPContinueTraining: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
38903 0 : if( s->rcpar )
38904 : {
38905 0 : ttype = 0;
38906 : }
38907 : else
38908 : {
38909 0 : ttype = 1;
38910 : }
38911 0 : if( !mlpissoftmax(network, _state) )
38912 : {
38913 0 : ntype = 0;
38914 : }
38915 : else
38916 : {
38917 0 : ntype = 1;
38918 : }
38919 0 : ae_assert(ntype==ttype, "MLPContinueTraining: type of input network is not similar to network type in trainer object.", _state);
38920 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
38921 0 : ae_assert(s->nin==nin, "MLPContinueTraining: number of inputs in trainer is not equal to number of inputs in the network.", _state);
38922 0 : ae_assert(s->nout==nout, "MLPContinueTraining: number of outputs in trainer is not equal to number of outputs in the network.", _state);
38923 0 : result = mlptrain_mlpcontinuetrainingx(s, &s->subset, -1, &s->ngradbatch, &s->session, _state);
38924 0 : if( result )
38925 : {
38926 0 : ae_v_move(&network->weights.ptr.p_double[0], 1, &s->session.network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
38927 : }
38928 0 : return result;
38929 : }
38930 :
38931 :
38932 : /*************************************************************************
38933 : Training neural networks ensemble using bootstrap aggregating (bagging).
38934 : Modified Levenberg-Marquardt algorithm is used as base training method.
38935 :
38936 : INPUT PARAMETERS:
38937 : Ensemble - model with initialized geometry
38938 : XY - training set
38939 : NPoints - training set size
38940 : Decay - weight decay coefficient, >=0.001
38941 : Restarts - restarts, >0.
38942 :
38943 : OUTPUT PARAMETERS:
38944 : Ensemble - trained model
38945 : Info - return code:
38946 : * -2, if there is a point with class number
38947 : outside of [0..NClasses-1].
38948 : * -1, if incorrect parameters was passed
38949 : (NPoints<0, Restarts<1).
38950 : * 2, if task has been solved.
38951 : Rep - training report.
38952 : OOBErrors - out-of-bag generalization error estimate
38953 :
38954 : -- ALGLIB --
38955 : Copyright 17.02.2009 by Bochkanov Sergey
38956 : *************************************************************************/
38957 0 : void mlpebagginglm(mlpensemble* ensemble,
38958 : /* Real */ ae_matrix* xy,
38959 : ae_int_t npoints,
38960 : double decay,
38961 : ae_int_t restarts,
38962 : ae_int_t* info,
38963 : mlpreport* rep,
38964 : mlpcvreport* ooberrors,
38965 : ae_state *_state)
38966 : {
38967 :
38968 0 : *info = 0;
38969 0 : _mlpreport_clear(rep);
38970 0 : _mlpcvreport_clear(ooberrors);
38971 :
38972 0 : mlptrain_mlpebagginginternal(ensemble, xy, npoints, decay, restarts, 0.0, 0, ae_true, info, rep, ooberrors, _state);
38973 0 : }
38974 :
38975 :
38976 : /*************************************************************************
38977 : Training neural networks ensemble using bootstrap aggregating (bagging).
38978 : L-BFGS algorithm is used as base training method.
38979 :
38980 : INPUT PARAMETERS:
38981 : Ensemble - model with initialized geometry
38982 : XY - training set
38983 : NPoints - training set size
38984 : Decay - weight decay coefficient, >=0.001
38985 : Restarts - restarts, >0.
38986 : WStep - stopping criterion, same as in MLPTrainLBFGS
38987 : MaxIts - stopping criterion, same as in MLPTrainLBFGS
38988 :
38989 : OUTPUT PARAMETERS:
38990 : Ensemble - trained model
38991 : Info - return code:
38992 : * -8, if both WStep=0 and MaxIts=0
38993 : * -2, if there is a point with class number
38994 : outside of [0..NClasses-1].
38995 : * -1, if incorrect parameters was passed
38996 : (NPoints<0, Restarts<1).
38997 : * 2, if task has been solved.
38998 : Rep - training report.
38999 : OOBErrors - out-of-bag generalization error estimate
39000 :
39001 : -- ALGLIB --
39002 : Copyright 17.02.2009 by Bochkanov Sergey
39003 : *************************************************************************/
39004 0 : void mlpebagginglbfgs(mlpensemble* ensemble,
39005 : /* Real */ ae_matrix* xy,
39006 : ae_int_t npoints,
39007 : double decay,
39008 : ae_int_t restarts,
39009 : double wstep,
39010 : ae_int_t maxits,
39011 : ae_int_t* info,
39012 : mlpreport* rep,
39013 : mlpcvreport* ooberrors,
39014 : ae_state *_state)
39015 : {
39016 :
39017 0 : *info = 0;
39018 0 : _mlpreport_clear(rep);
39019 0 : _mlpcvreport_clear(ooberrors);
39020 :
39021 0 : mlptrain_mlpebagginginternal(ensemble, xy, npoints, decay, restarts, wstep, maxits, ae_false, info, rep, ooberrors, _state);
39022 0 : }
39023 :
39024 :
39025 : /*************************************************************************
39026 : Training neural networks ensemble using early stopping.
39027 :
39028 : INPUT PARAMETERS:
39029 : Ensemble - model with initialized geometry
39030 : XY - training set
39031 : NPoints - training set size
39032 : Decay - weight decay coefficient, >=0.001
39033 : Restarts - restarts, >0.
39034 :
39035 : OUTPUT PARAMETERS:
39036 : Ensemble - trained model
39037 : Info - return code:
39038 : * -2, if there is a point with class number
39039 : outside of [0..NClasses-1].
39040 : * -1, if incorrect parameters was passed
39041 : (NPoints<0, Restarts<1).
39042 : * 6, if task has been solved.
39043 : Rep - training report.
39044 : OOBErrors - out-of-bag generalization error estimate
39045 :
39046 : -- ALGLIB --
39047 : Copyright 10.03.2009 by Bochkanov Sergey
39048 : *************************************************************************/
39049 0 : void mlpetraines(mlpensemble* ensemble,
39050 : /* Real */ ae_matrix* xy,
39051 : ae_int_t npoints,
39052 : double decay,
39053 : ae_int_t restarts,
39054 : ae_int_t* info,
39055 : mlpreport* rep,
39056 : ae_state *_state)
39057 : {
39058 : ae_frame _frame_block;
39059 : ae_int_t i;
39060 : ae_int_t k;
39061 : ae_int_t ccount;
39062 : ae_int_t pcount;
39063 : ae_matrix trnxy;
39064 : ae_matrix valxy;
39065 : ae_int_t trnsize;
39066 : ae_int_t valsize;
39067 : ae_int_t tmpinfo;
39068 : mlpreport tmprep;
39069 : modelerrors moderr;
39070 : ae_int_t nin;
39071 : ae_int_t nout;
39072 : ae_int_t wcount;
39073 :
39074 0 : ae_frame_make(_state, &_frame_block);
39075 0 : memset(&trnxy, 0, sizeof(trnxy));
39076 0 : memset(&valxy, 0, sizeof(valxy));
39077 0 : memset(&tmprep, 0, sizeof(tmprep));
39078 0 : memset(&moderr, 0, sizeof(moderr));
39079 0 : *info = 0;
39080 0 : _mlpreport_clear(rep);
39081 0 : ae_matrix_init(&trnxy, 0, 0, DT_REAL, _state, ae_true);
39082 0 : ae_matrix_init(&valxy, 0, 0, DT_REAL, _state, ae_true);
39083 0 : _mlpreport_init(&tmprep, _state, ae_true);
39084 0 : _modelerrors_init(&moderr, _state, ae_true);
39085 :
39086 0 : nin = mlpgetinputscount(&ensemble->network, _state);
39087 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
39088 0 : wcount = mlpgetweightscount(&ensemble->network, _state);
39089 0 : if( (npoints<2||restarts<1)||ae_fp_less(decay,(double)(0)) )
39090 : {
39091 0 : *info = -1;
39092 0 : ae_frame_leave(_state);
39093 0 : return;
39094 : }
39095 0 : if( mlpissoftmax(&ensemble->network, _state) )
39096 : {
39097 0 : for(i=0; i<=npoints-1; i++)
39098 : {
39099 0 : if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
39100 : {
39101 0 : *info = -2;
39102 0 : ae_frame_leave(_state);
39103 0 : return;
39104 : }
39105 : }
39106 : }
39107 0 : *info = 6;
39108 :
39109 : /*
39110 : * allocate
39111 : */
39112 0 : if( mlpissoftmax(&ensemble->network, _state) )
39113 : {
39114 0 : ccount = nin+1;
39115 0 : pcount = nin;
39116 : }
39117 : else
39118 : {
39119 0 : ccount = nin+nout;
39120 0 : pcount = nin+nout;
39121 : }
39122 0 : ae_matrix_set_length(&trnxy, npoints, ccount, _state);
39123 0 : ae_matrix_set_length(&valxy, npoints, ccount, _state);
39124 0 : rep->ngrad = 0;
39125 0 : rep->nhess = 0;
39126 0 : rep->ncholesky = 0;
39127 :
39128 : /*
39129 : * train networks
39130 : */
39131 0 : for(k=0; k<=ensemble->ensemblesize-1; k++)
39132 : {
39133 :
39134 : /*
39135 : * Split set
39136 : */
39137 0 : do
39138 : {
39139 0 : trnsize = 0;
39140 0 : valsize = 0;
39141 0 : for(i=0; i<=npoints-1; i++)
39142 : {
39143 0 : if( ae_fp_less(ae_randomreal(_state),0.66) )
39144 : {
39145 :
39146 : /*
39147 : * Assign sample to training set
39148 : */
39149 0 : ae_v_move(&trnxy.ptr.pp_double[trnsize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,ccount-1));
39150 0 : trnsize = trnsize+1;
39151 : }
39152 : else
39153 : {
39154 :
39155 : /*
39156 : * Assign sample to validation set
39157 : */
39158 0 : ae_v_move(&valxy.ptr.pp_double[valsize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,ccount-1));
39159 0 : valsize = valsize+1;
39160 : }
39161 : }
39162 : }
39163 0 : while(!(trnsize!=0&&valsize!=0));
39164 :
39165 : /*
39166 : * Train
39167 : */
39168 0 : mlptraines(&ensemble->network, &trnxy, trnsize, &valxy, valsize, decay, restarts, &tmpinfo, &tmprep, _state);
39169 0 : if( tmpinfo<0 )
39170 : {
39171 0 : *info = tmpinfo;
39172 0 : ae_frame_leave(_state);
39173 0 : return;
39174 : }
39175 :
39176 : /*
39177 : * save results
39178 : */
39179 0 : ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &ensemble->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
39180 0 : ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcount], 1, &ensemble->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
39181 0 : ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcount], 1, &ensemble->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
39182 0 : rep->ngrad = rep->ngrad+tmprep.ngrad;
39183 0 : rep->nhess = rep->nhess+tmprep.nhess;
39184 0 : rep->ncholesky = rep->ncholesky+tmprep.ncholesky;
39185 : }
39186 0 : mlpeallerrorsx(ensemble, xy, &ensemble->network.dummysxy, npoints, 0, &ensemble->network.dummyidx, 0, npoints, 0, &ensemble->network.buf, &moderr, _state);
39187 0 : rep->relclserror = moderr.relclserror;
39188 0 : rep->avgce = moderr.avgce;
39189 0 : rep->rmserror = moderr.rmserror;
39190 0 : rep->avgerror = moderr.avgerror;
39191 0 : rep->avgrelerror = moderr.avgrelerror;
39192 0 : ae_frame_leave(_state);
39193 : }
39194 :
39195 :
39196 : /*************************************************************************
39197 : This function trains neural network ensemble passed to this function using
39198 : current dataset and early stopping training algorithm. Each early stopping
39199 : round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
39200 : training rounds is performed in total).
39201 :
39202 : ! COMMERCIAL EDITION OF ALGLIB:
39203 : !
39204 : ! Commercial Edition of ALGLIB includes following important improvements
39205 : ! of this function:
39206 : ! * high-performance native backend with same C# interface (C# version)
39207 : ! * multithreading support (C++ and C# versions)
39208 : !
39209 : ! We recommend you to read 'Working with commercial version' section of
39210 : ! ALGLIB Reference Manual in order to find out how to use performance-
39211 : ! related features provided by commercial edition of ALGLIB.
39212 :
39213 : INPUT PARAMETERS:
39214 : S - trainer object;
39215 : Ensemble - neural network ensemble. It must have same number of
39216 : inputs and outputs/classes as was specified during
39217 : creation of the trainer object.
39218 : NRestarts - number of restarts, >=0:
39219 : * NRestarts>0 means that specified number of random
39220 : restarts are performed during each ES round;
39221 : * NRestarts=0 is silently replaced by 1.
39222 :
39223 : OUTPUT PARAMETERS:
39224 : Ensemble - trained ensemble;
39225 : Rep - it contains all type of errors.
39226 :
39227 : NOTE: this training method uses BOTH early stopping and weight decay! So,
39228 : you should select weight decay before starting training just as you
39229 : select it before training "conventional" networks.
39230 :
39231 : NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
39232 : or single-point dataset was passed, ensemble is filled by zero
39233 : values.
39234 :
39235 : NOTE: this method uses sum-of-squares error function for training.
39236 :
39237 : -- ALGLIB --
39238 : Copyright 22.08.2012 by Bochkanov Sergey
39239 : *************************************************************************/
39240 0 : void mlptrainensemblees(mlptrainer* s,
39241 : mlpensemble* ensemble,
39242 : ae_int_t nrestarts,
39243 : mlpreport* rep,
39244 : ae_state *_state)
39245 : {
39246 : ae_frame _frame_block;
39247 : ae_int_t nin;
39248 : ae_int_t nout;
39249 : ae_int_t ntype;
39250 : ae_int_t ttype;
39251 : ae_shared_pool esessions;
39252 : sinteger sgrad;
39253 : modelerrors tmprep;
39254 :
39255 0 : ae_frame_make(_state, &_frame_block);
39256 0 : memset(&esessions, 0, sizeof(esessions));
39257 0 : memset(&sgrad, 0, sizeof(sgrad));
39258 0 : memset(&tmprep, 0, sizeof(tmprep));
39259 0 : _mlpreport_clear(rep);
39260 0 : ae_shared_pool_init(&esessions, _state, ae_true);
39261 0 : _sinteger_init(&sgrad, _state, ae_true);
39262 0 : _modelerrors_init(&tmprep, _state, ae_true);
39263 :
39264 0 : ae_assert(s->npoints>=0, "MLPTrainEnsembleES: parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
39265 0 : if( !mlpeissoftmax(ensemble, _state) )
39266 : {
39267 0 : ntype = 0;
39268 : }
39269 : else
39270 : {
39271 0 : ntype = 1;
39272 : }
39273 0 : if( s->rcpar )
39274 : {
39275 0 : ttype = 0;
39276 : }
39277 : else
39278 : {
39279 0 : ttype = 1;
39280 : }
39281 0 : ae_assert(ntype==ttype, "MLPTrainEnsembleES: internal error - type of input network is not similar to network type in trainer object", _state);
39282 0 : nin = mlpgetinputscount(&ensemble->network, _state);
39283 0 : ae_assert(s->nin==nin, "MLPTrainEnsembleES: number of inputs in trainer is not equal to number of inputs in ensemble network", _state);
39284 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
39285 0 : ae_assert(s->nout==nout, "MLPTrainEnsembleES: number of outputs in trainer is not equal to number of outputs in ensemble network", _state);
39286 0 : ae_assert(nrestarts>=0, "MLPTrainEnsembleES: NRestarts<0.", _state);
39287 :
39288 : /*
39289 : * Initialize parameter Rep
39290 : */
39291 0 : rep->relclserror = (double)(0);
39292 0 : rep->avgce = (double)(0);
39293 0 : rep->rmserror = (double)(0);
39294 0 : rep->avgerror = (double)(0);
39295 0 : rep->avgrelerror = (double)(0);
39296 0 : rep->ngrad = 0;
39297 0 : rep->nhess = 0;
39298 0 : rep->ncholesky = 0;
39299 :
39300 : /*
39301 : * Allocate
39302 : */
39303 0 : ivectorsetlengthatleast(&s->subset, s->npoints, _state);
39304 0 : ivectorsetlengthatleast(&s->valsubset, s->npoints, _state);
39305 :
39306 : /*
39307 : * Start training
39308 : *
39309 : * NOTE: ESessions is not initialized because MLPTrainEnsembleX
39310 : * needs uninitialized pool.
39311 : */
39312 0 : sgrad.val = 0;
39313 0 : mlptrain_mlptrainensemblex(s, ensemble, 0, ensemble->ensemblesize, nrestarts, 0, &sgrad, ae_true, &esessions, _state);
39314 0 : rep->ngrad = sgrad.val;
39315 :
39316 : /*
39317 : * Calculate errors.
39318 : */
39319 0 : if( s->datatype==0 )
39320 : {
39321 0 : mlpeallerrorsx(ensemble, &s->densexy, &s->sparsexy, s->npoints, 0, &ensemble->network.dummyidx, 0, s->npoints, 0, &ensemble->network.buf, &tmprep, _state);
39322 : }
39323 0 : if( s->datatype==1 )
39324 : {
39325 0 : mlpeallerrorsx(ensemble, &s->densexy, &s->sparsexy, s->npoints, 1, &ensemble->network.dummyidx, 0, s->npoints, 0, &ensemble->network.buf, &tmprep, _state);
39326 : }
39327 0 : rep->relclserror = tmprep.relclserror;
39328 0 : rep->avgce = tmprep.avgce;
39329 0 : rep->rmserror = tmprep.rmserror;
39330 0 : rep->avgerror = tmprep.avgerror;
39331 0 : rep->avgrelerror = tmprep.avgrelerror;
39332 0 : ae_frame_leave(_state);
39333 0 : }
39334 :
39335 :
39336 : /*************************************************************************
39337 : Internal cross-validation subroutine
39338 : *************************************************************************/
39339 0 : static void mlptrain_mlpkfoldcvgeneral(multilayerperceptron* n,
39340 : /* Real */ ae_matrix* xy,
39341 : ae_int_t npoints,
39342 : double decay,
39343 : ae_int_t restarts,
39344 : ae_int_t foldscount,
39345 : ae_bool lmalgorithm,
39346 : double wstep,
39347 : ae_int_t maxits,
39348 : ae_int_t* info,
39349 : mlpreport* rep,
39350 : mlpcvreport* cvrep,
39351 : ae_state *_state)
39352 : {
39353 : ae_frame _frame_block;
39354 : ae_int_t i;
39355 : ae_int_t fold;
39356 : ae_int_t j;
39357 : ae_int_t k;
39358 : multilayerperceptron network;
39359 : ae_int_t nin;
39360 : ae_int_t nout;
39361 : ae_int_t rowlen;
39362 : ae_int_t wcount;
39363 : ae_int_t nclasses;
39364 : ae_int_t tssize;
39365 : ae_int_t cvssize;
39366 : ae_matrix cvset;
39367 : ae_matrix testset;
39368 : ae_vector folds;
39369 : ae_int_t relcnt;
39370 : mlpreport internalrep;
39371 : ae_vector x;
39372 : ae_vector y;
39373 :
39374 0 : ae_frame_make(_state, &_frame_block);
39375 0 : memset(&network, 0, sizeof(network));
39376 0 : memset(&cvset, 0, sizeof(cvset));
39377 0 : memset(&testset, 0, sizeof(testset));
39378 0 : memset(&folds, 0, sizeof(folds));
39379 0 : memset(&internalrep, 0, sizeof(internalrep));
39380 0 : memset(&x, 0, sizeof(x));
39381 0 : memset(&y, 0, sizeof(y));
39382 0 : *info = 0;
39383 0 : _mlpreport_clear(rep);
39384 0 : _mlpcvreport_clear(cvrep);
39385 0 : _multilayerperceptron_init(&network, _state, ae_true);
39386 0 : ae_matrix_init(&cvset, 0, 0, DT_REAL, _state, ae_true);
39387 0 : ae_matrix_init(&testset, 0, 0, DT_REAL, _state, ae_true);
39388 0 : ae_vector_init(&folds, 0, DT_INT, _state, ae_true);
39389 0 : _mlpreport_init(&internalrep, _state, ae_true);
39390 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
39391 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
39392 :
39393 :
39394 : /*
39395 : * Read network geometry, test parameters
39396 : */
39397 0 : mlpproperties(n, &nin, &nout, &wcount, _state);
39398 0 : if( mlpissoftmax(n, _state) )
39399 : {
39400 0 : nclasses = nout;
39401 0 : rowlen = nin+1;
39402 : }
39403 : else
39404 : {
39405 0 : nclasses = -nout;
39406 0 : rowlen = nin+nout;
39407 : }
39408 0 : if( (npoints<=0||foldscount<2)||foldscount>npoints )
39409 : {
39410 0 : *info = -1;
39411 0 : ae_frame_leave(_state);
39412 0 : return;
39413 : }
39414 0 : mlpcopy(n, &network, _state);
39415 :
39416 : /*
39417 : * K-fold out cross-validation.
39418 : * First, estimate generalization error
39419 : */
39420 0 : ae_matrix_set_length(&testset, npoints-1+1, rowlen-1+1, _state);
39421 0 : ae_matrix_set_length(&cvset, npoints-1+1, rowlen-1+1, _state);
39422 0 : ae_vector_set_length(&x, nin-1+1, _state);
39423 0 : ae_vector_set_length(&y, nout-1+1, _state);
39424 0 : mlptrain_mlpkfoldsplit(xy, npoints, nclasses, foldscount, ae_false, &folds, _state);
39425 0 : cvrep->relclserror = (double)(0);
39426 0 : cvrep->avgce = (double)(0);
39427 0 : cvrep->rmserror = (double)(0);
39428 0 : cvrep->avgerror = (double)(0);
39429 0 : cvrep->avgrelerror = (double)(0);
39430 0 : rep->ngrad = 0;
39431 0 : rep->nhess = 0;
39432 0 : rep->ncholesky = 0;
39433 0 : relcnt = 0;
39434 0 : for(fold=0; fold<=foldscount-1; fold++)
39435 : {
39436 :
39437 : /*
39438 : * Separate set
39439 : */
39440 0 : tssize = 0;
39441 0 : cvssize = 0;
39442 0 : for(i=0; i<=npoints-1; i++)
39443 : {
39444 0 : if( folds.ptr.p_int[i]==fold )
39445 : {
39446 0 : ae_v_move(&testset.ptr.pp_double[tssize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,rowlen-1));
39447 0 : tssize = tssize+1;
39448 : }
39449 : else
39450 : {
39451 0 : ae_v_move(&cvset.ptr.pp_double[cvssize][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,rowlen-1));
39452 0 : cvssize = cvssize+1;
39453 : }
39454 : }
39455 :
39456 : /*
39457 : * Train on CV training set
39458 : */
39459 0 : if( lmalgorithm )
39460 : {
39461 0 : mlptrainlm(&network, &cvset, cvssize, decay, restarts, info, &internalrep, _state);
39462 : }
39463 : else
39464 : {
39465 0 : mlptrainlbfgs(&network, &cvset, cvssize, decay, restarts, wstep, maxits, info, &internalrep, _state);
39466 : }
39467 0 : if( *info<0 )
39468 : {
39469 0 : cvrep->relclserror = (double)(0);
39470 0 : cvrep->avgce = (double)(0);
39471 0 : cvrep->rmserror = (double)(0);
39472 0 : cvrep->avgerror = (double)(0);
39473 0 : cvrep->avgrelerror = (double)(0);
39474 0 : ae_frame_leave(_state);
39475 0 : return;
39476 : }
39477 0 : rep->ngrad = rep->ngrad+internalrep.ngrad;
39478 0 : rep->nhess = rep->nhess+internalrep.nhess;
39479 0 : rep->ncholesky = rep->ncholesky+internalrep.ncholesky;
39480 :
39481 : /*
39482 : * Estimate error using CV test set
39483 : */
39484 0 : if( mlpissoftmax(&network, _state) )
39485 : {
39486 :
39487 : /*
39488 : * classification-only code
39489 : */
39490 0 : cvrep->relclserror = cvrep->relclserror+mlpclserror(&network, &testset, tssize, _state);
39491 0 : cvrep->avgce = cvrep->avgce+mlperrorn(&network, &testset, tssize, _state);
39492 : }
39493 0 : for(i=0; i<=tssize-1; i++)
39494 : {
39495 0 : ae_v_move(&x.ptr.p_double[0], 1, &testset.ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
39496 0 : mlpprocess(&network, &x, &y, _state);
39497 0 : if( mlpissoftmax(&network, _state) )
39498 : {
39499 :
39500 : /*
39501 : * Classification-specific code
39502 : */
39503 0 : k = ae_round(testset.ptr.pp_double[i][nin], _state);
39504 0 : for(j=0; j<=nout-1; j++)
39505 : {
39506 0 : if( j==k )
39507 : {
39508 0 : cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j]-1, _state);
39509 0 : cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j]-1, _state);
39510 0 : cvrep->avgrelerror = cvrep->avgrelerror+ae_fabs(y.ptr.p_double[j]-1, _state);
39511 0 : relcnt = relcnt+1;
39512 : }
39513 : else
39514 : {
39515 0 : cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j], _state);
39516 0 : cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j], _state);
39517 : }
39518 : }
39519 : }
39520 : else
39521 : {
39522 :
39523 : /*
39524 : * Regression-specific code
39525 : */
39526 0 : for(j=0; j<=nout-1; j++)
39527 : {
39528 0 : cvrep->rmserror = cvrep->rmserror+ae_sqr(y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j], _state);
39529 0 : cvrep->avgerror = cvrep->avgerror+ae_fabs(y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j], _state);
39530 0 : if( ae_fp_neq(testset.ptr.pp_double[i][nin+j],(double)(0)) )
39531 : {
39532 0 : cvrep->avgrelerror = cvrep->avgrelerror+ae_fabs((y.ptr.p_double[j]-testset.ptr.pp_double[i][nin+j])/testset.ptr.pp_double[i][nin+j], _state);
39533 0 : relcnt = relcnt+1;
39534 : }
39535 : }
39536 : }
39537 : }
39538 : }
39539 0 : if( mlpissoftmax(&network, _state) )
39540 : {
39541 0 : cvrep->relclserror = cvrep->relclserror/npoints;
39542 0 : cvrep->avgce = cvrep->avgce/(ae_log((double)(2), _state)*npoints);
39543 : }
39544 0 : cvrep->rmserror = ae_sqrt(cvrep->rmserror/(npoints*nout), _state);
39545 0 : cvrep->avgerror = cvrep->avgerror/(npoints*nout);
39546 0 : if( relcnt>0 )
39547 : {
39548 0 : cvrep->avgrelerror = cvrep->avgrelerror/relcnt;
39549 : }
39550 0 : *info = 1;
39551 0 : ae_frame_leave(_state);
39552 : }
39553 :
39554 :
39555 : /*************************************************************************
39556 : Subroutine prepares K-fold split of the training set.
39557 :
39558 : NOTES:
39559 : "NClasses>0" means that we have classification task.
39560 : "NClasses<0" means regression task with -NClasses real outputs.
39561 : *************************************************************************/
39562 0 : static void mlptrain_mlpkfoldsplit(/* Real */ ae_matrix* xy,
39563 : ae_int_t npoints,
39564 : ae_int_t nclasses,
39565 : ae_int_t foldscount,
39566 : ae_bool stratifiedsplits,
39567 : /* Integer */ ae_vector* folds,
39568 : ae_state *_state)
39569 : {
39570 : ae_frame _frame_block;
39571 : ae_int_t i;
39572 : ae_int_t j;
39573 : ae_int_t k;
39574 : hqrndstate rs;
39575 :
39576 0 : ae_frame_make(_state, &_frame_block);
39577 0 : memset(&rs, 0, sizeof(rs));
39578 0 : ae_vector_clear(folds);
39579 0 : _hqrndstate_init(&rs, _state, ae_true);
39580 :
39581 :
39582 : /*
39583 : * test parameters
39584 : */
39585 0 : ae_assert(npoints>0, "MLPKFoldSplit: wrong NPoints!", _state);
39586 0 : ae_assert(nclasses>1||nclasses<0, "MLPKFoldSplit: wrong NClasses!", _state);
39587 0 : ae_assert(foldscount>=2&&foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!", _state);
39588 0 : ae_assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!", _state);
39589 :
39590 : /*
39591 : * Folds
39592 : */
39593 0 : hqrndrandomize(&rs, _state);
39594 0 : ae_vector_set_length(folds, npoints-1+1, _state);
39595 0 : for(i=0; i<=npoints-1; i++)
39596 : {
39597 0 : folds->ptr.p_int[i] = i*foldscount/npoints;
39598 : }
39599 0 : for(i=0; i<=npoints-2; i++)
39600 : {
39601 0 : j = i+hqrnduniformi(&rs, npoints-i, _state);
39602 0 : if( j!=i )
39603 : {
39604 0 : k = folds->ptr.p_int[i];
39605 0 : folds->ptr.p_int[i] = folds->ptr.p_int[j];
39606 0 : folds->ptr.p_int[j] = k;
39607 : }
39608 : }
39609 0 : ae_frame_leave(_state);
39610 0 : }
39611 :
39612 :
39613 : /*************************************************************************
39614 : Internal subroutine for parallelization function MLPFoldCV.
39615 :
39616 :
39617 : INPUT PARAMETERS:
39618 : S - trainer object;
39619 : RowSize - row size(eitherNIn+NOut or NIn+1);
39620 : NRestarts - number of restarts(>=0);
39621 : Folds - cross-validation set;
39622 : Fold - the number of first cross-validation(>=0);
39623 : DFold - the number of second cross-validation(>=Fold+1);
39624 : CVY - parameter which stores the result is returned by network,
39625 : training on I-th cross-validation set.
39626 : It has to be preallocated.
39627 : PoolDataCV- parameter for parallelization.
39628 : WCount - number of weights in network, used to make decisions on
39629 : parallelization.
39630 :
39631 : NOTE: There are no checks on the parameters correctness.
39632 :
39633 : -- ALGLIB --
39634 : Copyright 25.09.2012 by Bochkanov Sergey
39635 : *************************************************************************/
39636 0 : static void mlptrain_mthreadcv(mlptrainer* s,
39637 : ae_int_t rowsize,
39638 : ae_int_t nrestarts,
39639 : /* Integer */ ae_vector* folds,
39640 : ae_int_t fold,
39641 : ae_int_t dfold,
39642 : /* Real */ ae_matrix* cvy,
39643 : ae_shared_pool* pooldatacv,
39644 : ae_int_t wcount,
39645 : ae_state *_state)
39646 : {
39647 : ae_frame _frame_block;
39648 : mlpparallelizationcv *datacv;
39649 : ae_smart_ptr _datacv;
39650 : ae_int_t i;
39651 :
39652 0 : ae_frame_make(_state, &_frame_block);
39653 0 : memset(&_datacv, 0, sizeof(_datacv));
39654 0 : ae_smart_ptr_init(&_datacv, (void**)&datacv, _state, ae_true);
39655 :
39656 0 : if( fold==dfold-1 )
39657 : {
39658 :
39659 : /*
39660 : * Separate set
39661 : */
39662 0 : ae_shared_pool_retrieve(pooldatacv, &_datacv, _state);
39663 0 : datacv->subsetsize = 0;
39664 0 : for(i=0; i<=s->npoints-1; i++)
39665 : {
39666 0 : if( folds->ptr.p_int[i]!=fold )
39667 : {
39668 0 : datacv->subset.ptr.p_int[datacv->subsetsize] = i;
39669 0 : datacv->subsetsize = datacv->subsetsize+1;
39670 : }
39671 : }
39672 :
39673 : /*
39674 : * Train on CV training set
39675 : */
39676 0 : mlptrain_mlptrainnetworkx(s, nrestarts, -1, &datacv->subset, datacv->subsetsize, &datacv->subset, 0, &datacv->network, &datacv->rep, ae_true, &datacv->trnpool, _state);
39677 0 : datacv->ngrad = datacv->ngrad+datacv->rep.ngrad;
39678 :
39679 : /*
39680 : * Estimate error using CV test set
39681 : */
39682 0 : for(i=0; i<=s->npoints-1; i++)
39683 : {
39684 0 : if( folds->ptr.p_int[i]==fold )
39685 : {
39686 0 : if( s->datatype==0 )
39687 : {
39688 0 : ae_v_move(&datacv->xyrow.ptr.p_double[0], 1, &s->densexy.ptr.pp_double[i][0], 1, ae_v_len(0,rowsize-1));
39689 : }
39690 0 : if( s->datatype==1 )
39691 : {
39692 0 : sparsegetrow(&s->sparsexy, i, &datacv->xyrow, _state);
39693 : }
39694 0 : mlpprocess(&datacv->network, &datacv->xyrow, &datacv->y, _state);
39695 0 : ae_v_move(&cvy->ptr.pp_double[i][0], 1, &datacv->y.ptr.p_double[0], 1, ae_v_len(0,s->nout-1));
39696 : }
39697 : }
39698 0 : ae_shared_pool_recycle(pooldatacv, &_datacv, _state);
39699 : }
39700 : else
39701 : {
39702 0 : ae_assert(fold<dfold-1, "MThreadCV: internal error(Fold>DFold-1).", _state);
39703 :
39704 : /*
39705 : * We expect that minimum number of iterations before convergence is 100.
39706 : * Hence is our approach to evaluation of task complexity.
39707 : */
39708 0 : if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
39709 : {
39710 0 : if( _trypexec_mlptrain_mthreadcv(s,rowsize,nrestarts,folds,fold,dfold,cvy,pooldatacv,wcount, _state) )
39711 : {
39712 0 : ae_frame_leave(_state);
39713 0 : return;
39714 : }
39715 : }
39716 :
39717 : /*
39718 : * Split task
39719 : */
39720 0 : mlptrain_mthreadcv(s, rowsize, nrestarts, folds, fold, (fold+dfold)/2, cvy, pooldatacv, wcount, _state);
39721 0 : mlptrain_mthreadcv(s, rowsize, nrestarts, folds, (fold+dfold)/2, dfold, cvy, pooldatacv, wcount, _state);
39722 : }
39723 0 : ae_frame_leave(_state);
39724 : }
39725 :
39726 :
39727 : /*************************************************************************
39728 : Serial stub for GPL edition.
39729 : *************************************************************************/
39730 0 : ae_bool _trypexec_mlptrain_mthreadcv(mlptrainer* s,
39731 : ae_int_t rowsize,
39732 : ae_int_t nrestarts,
39733 : /* Integer */ ae_vector* folds,
39734 : ae_int_t fold,
39735 : ae_int_t dfold,
39736 : /* Real */ ae_matrix* cvy,
39737 : ae_shared_pool* pooldatacv,
39738 : ae_int_t wcount,
39739 : ae_state *_state)
39740 : {
39741 0 : return ae_false;
39742 : }
39743 :
39744 :
39745 : /*************************************************************************
39746 : This function trains neural network passed to this function, using current
39747 : dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
39748 : and current training settings. Training from NRestarts random starting
39749 : positions is performed, best network is chosen.
39750 :
39751 : This function is inteded to be used internally. It may be used in several
39752 : settings:
39753 : * training with ValSubsetSize=0, corresponds to "normal" training with
39754 : termination criteria based on S.MaxIts (steps count) and S.WStep (step
39755 : size). Training sample is given by TrnSubset/TrnSubsetSize.
39756 : * training with ValSubsetSize>0, corresponds to early stopping training
39757 : with additional MaxIts/WStep stopping criteria. Training sample is given
39758 : by TrnSubset/TrnSubsetSize, validation sample is given by ValSubset/
39759 : ValSubsetSize.
39760 :
39761 : -- ALGLIB --
39762 : Copyright 13.08.2012 by Bochkanov Sergey
39763 : *************************************************************************/
39764 0 : static void mlptrain_mlptrainnetworkx(mlptrainer* s,
39765 : ae_int_t nrestarts,
39766 : ae_int_t algokind,
39767 : /* Integer */ ae_vector* trnsubset,
39768 : ae_int_t trnsubsetsize,
39769 : /* Integer */ ae_vector* valsubset,
39770 : ae_int_t valsubsetsize,
39771 : multilayerperceptron* network,
39772 : mlpreport* rep,
39773 : ae_bool isrootcall,
39774 : ae_shared_pool* sessions,
39775 : ae_state *_state)
39776 : {
39777 : ae_frame _frame_block;
39778 : modelerrors modrep;
39779 : double eval;
39780 : double ebest;
39781 : ae_int_t ngradbatch;
39782 : ae_int_t nin;
39783 : ae_int_t nout;
39784 : ae_int_t wcount;
39785 : ae_int_t pcount;
39786 : ae_int_t itbest;
39787 : ae_int_t itcnt;
39788 : ae_int_t ntype;
39789 : ae_int_t ttype;
39790 : ae_bool rndstart;
39791 : ae_int_t i;
39792 : ae_int_t nr0;
39793 : ae_int_t nr1;
39794 : mlpreport rep0;
39795 : mlpreport rep1;
39796 : ae_bool randomizenetwork;
39797 : double bestrmserror;
39798 : smlptrnsession *psession;
39799 : ae_smart_ptr _psession;
39800 :
39801 0 : ae_frame_make(_state, &_frame_block);
39802 0 : memset(&modrep, 0, sizeof(modrep));
39803 0 : memset(&rep0, 0, sizeof(rep0));
39804 0 : memset(&rep1, 0, sizeof(rep1));
39805 0 : memset(&_psession, 0, sizeof(_psession));
39806 0 : _modelerrors_init(&modrep, _state, ae_true);
39807 0 : _mlpreport_init(&rep0, _state, ae_true);
39808 0 : _mlpreport_init(&rep1, _state, ae_true);
39809 0 : ae_smart_ptr_init(&_psession, (void**)&psession, _state, ae_true);
39810 :
39811 0 : mlpproperties(network, &nin, &nout, &wcount, _state);
39812 :
39813 : /*
39814 : * Process root call
39815 : */
39816 0 : if( isrootcall )
39817 : {
39818 :
39819 : /*
39820 : * Try parallelization
39821 : * We expect that minimum number of iterations before convergence is 100.
39822 : * Hence is our approach to evaluation of task complexity.
39823 : */
39824 0 : if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
39825 : {
39826 0 : if( _trypexec_mlptrain_mlptrainnetworkx(s,nrestarts,algokind,trnsubset,trnsubsetsize,valsubset,valsubsetsize,network,rep,isrootcall,sessions, _state) )
39827 : {
39828 0 : ae_frame_leave(_state);
39829 0 : return;
39830 : }
39831 : }
39832 :
39833 : /*
39834 : * Check correctness of parameters
39835 : */
39836 0 : ae_assert(algokind==0||algokind==-1, "MLPTrainNetworkX: unexpected AlgoKind", _state);
39837 0 : ae_assert(s->npoints>=0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
39838 0 : if( s->rcpar )
39839 : {
39840 0 : ttype = 0;
39841 : }
39842 : else
39843 : {
39844 0 : ttype = 1;
39845 : }
39846 0 : if( !mlpissoftmax(network, _state) )
39847 : {
39848 0 : ntype = 0;
39849 : }
39850 : else
39851 : {
39852 0 : ntype = 1;
39853 : }
39854 0 : ae_assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object", _state);
39855 0 : ae_assert(s->nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network.", _state);
39856 0 : ae_assert(s->nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network.", _state);
39857 0 : ae_assert(nrestarts>=0, "MLPTrainNetworkX: internal error - NRestarts<0.", _state);
39858 0 : ae_assert(trnsubset->cnt>=trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)", _state);
39859 0 : for(i=0; i<=trnsubsetsize-1; i++)
39860 : {
39861 0 : ae_assert(trnsubset->ptr.p_int[i]>=0&&trnsubset->ptr.p_int[i]<=s->npoints-1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)", _state);
39862 : }
39863 0 : ae_assert(valsubset->cnt>=valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)", _state);
39864 0 : for(i=0; i<=valsubsetsize-1; i++)
39865 : {
39866 0 : ae_assert(valsubset->ptr.p_int[i]>=0&&valsubset->ptr.p_int[i]<=s->npoints-1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)", _state);
39867 : }
39868 :
39869 : /*
39870 : * Train
39871 : */
39872 0 : randomizenetwork = nrestarts>0;
39873 0 : mlptrain_initmlptrnsessions(network, randomizenetwork, s, sessions, _state);
39874 0 : mlptrain_mlptrainnetworkx(s, nrestarts, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep, ae_false, sessions, _state);
39875 :
39876 : /*
39877 : * Choose best network
39878 : */
39879 0 : bestrmserror = ae_maxrealnumber;
39880 0 : ae_shared_pool_first_recycled(sessions, &_psession, _state);
39881 0 : while(psession!=NULL)
39882 : {
39883 0 : if( ae_fp_less(psession->bestrmserror,bestrmserror) )
39884 : {
39885 0 : mlpimporttunableparameters(network, &psession->bestparameters, _state);
39886 0 : bestrmserror = psession->bestrmserror;
39887 : }
39888 0 : ae_shared_pool_next_recycled(sessions, &_psession, _state);
39889 : }
39890 :
39891 : /*
39892 : * Calculate errors
39893 : */
39894 0 : if( s->datatype==0 )
39895 : {
39896 0 : mlpallerrorssubset(network, &s->densexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
39897 : }
39898 0 : if( s->datatype==1 )
39899 : {
39900 0 : mlpallerrorssparsesubset(network, &s->sparsexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
39901 : }
39902 0 : rep->relclserror = modrep.relclserror;
39903 0 : rep->avgce = modrep.avgce;
39904 0 : rep->rmserror = modrep.rmserror;
39905 0 : rep->avgerror = modrep.avgerror;
39906 0 : rep->avgrelerror = modrep.avgrelerror;
39907 :
39908 : /*
39909 : * Done
39910 : */
39911 0 : ae_frame_leave(_state);
39912 0 : return;
39913 : }
39914 :
39915 : /*
39916 : * Split problem, if we have more than 1 restart
39917 : */
39918 0 : if( nrestarts>=2 )
39919 : {
39920 :
39921 : /*
39922 : * Divide problem with NRestarts into two: NR0 and NR1.
39923 : */
39924 0 : nr0 = nrestarts/2;
39925 0 : nr1 = nrestarts-nr0;
39926 0 : mlptrain_mlptrainnetworkx(s, nr0, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, &rep0, ae_false, sessions, _state);
39927 0 : mlptrain_mlptrainnetworkx(s, nr1, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, &rep1, ae_false, sessions, _state);
39928 :
39929 : /*
39930 : * Aggregate results
39931 : */
39932 0 : rep->ngrad = rep0.ngrad+rep1.ngrad;
39933 0 : rep->nhess = rep0.nhess+rep1.nhess;
39934 0 : rep->ncholesky = rep0.ncholesky+rep1.ncholesky;
39935 :
39936 : /*
39937 : * Done :)
39938 : */
39939 0 : ae_frame_leave(_state);
39940 0 : return;
39941 : }
39942 :
39943 : /*
39944 : * Execution with NRestarts=1 or NRestarts=0:
39945 : * * NRestarts=1 means that network is restarted from random position
39946 : * * NRestarts=0 means that network is not randomized
39947 : */
39948 0 : ae_assert(nrestarts==0||nrestarts==1, "MLPTrainNetworkX: internal error", _state);
39949 0 : rep->ngrad = 0;
39950 0 : rep->nhess = 0;
39951 0 : rep->ncholesky = 0;
39952 0 : ae_shared_pool_retrieve(sessions, &_psession, _state);
39953 0 : if( ((s->datatype==0||s->datatype==1)&&s->npoints>0)&&trnsubsetsize!=0 )
39954 : {
39955 :
39956 : /*
39957 : * Train network using combination of early stopping and step-size
39958 : * and step-count based criteria. Network state with best value of
39959 : * validation set error is stored in WBuf0. When validation set is
39960 : * zero, most recent state of network is stored.
39961 : */
39962 0 : rndstart = nrestarts!=0;
39963 0 : ngradbatch = 0;
39964 0 : eval = (double)(0);
39965 0 : ebest = (double)(0);
39966 0 : itbest = 0;
39967 0 : itcnt = 0;
39968 0 : mlptrain_mlpstarttrainingx(s, rndstart, algokind, trnsubset, trnsubsetsize, psession, _state);
39969 0 : if( s->datatype==0 )
39970 : {
39971 0 : ebest = mlperrorsubset(&psession->network, &s->densexy, s->npoints, valsubset, valsubsetsize, _state);
39972 : }
39973 0 : if( s->datatype==1 )
39974 : {
39975 0 : ebest = mlperrorsparsesubset(&psession->network, &s->sparsexy, s->npoints, valsubset, valsubsetsize, _state);
39976 : }
39977 0 : ae_v_move(&psession->wbuf0.ptr.p_double[0], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
39978 0 : while(mlptrain_mlpcontinuetrainingx(s, trnsubset, trnsubsetsize, &ngradbatch, psession, _state))
39979 : {
39980 0 : if( s->datatype==0 )
39981 : {
39982 0 : eval = mlperrorsubset(&psession->network, &s->densexy, s->npoints, valsubset, valsubsetsize, _state);
39983 : }
39984 0 : if( s->datatype==1 )
39985 : {
39986 0 : eval = mlperrorsparsesubset(&psession->network, &s->sparsexy, s->npoints, valsubset, valsubsetsize, _state);
39987 : }
39988 0 : if( ae_fp_less_eq(eval,ebest)||valsubsetsize==0 )
39989 : {
39990 0 : ae_v_move(&psession->wbuf0.ptr.p_double[0], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
39991 0 : ebest = eval;
39992 0 : itbest = itcnt;
39993 : }
39994 0 : if( itcnt>30&&ae_fp_greater((double)(itcnt),1.5*itbest) )
39995 : {
39996 0 : break;
39997 : }
39998 0 : itcnt = itcnt+1;
39999 : }
40000 0 : ae_v_move(&psession->network.weights.ptr.p_double[0], 1, &psession->wbuf0.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
40001 0 : rep->ngrad = ngradbatch;
40002 : }
40003 : else
40004 : {
40005 0 : for(i=0; i<=wcount-1; i++)
40006 : {
40007 0 : psession->network.weights.ptr.p_double[i] = (double)(0);
40008 : }
40009 : }
40010 :
40011 : /*
40012 : * Evaluate network performance and update PSession.BestParameters/BestRMSError
40013 : * (if needed).
40014 : */
40015 0 : if( s->datatype==0 )
40016 : {
40017 0 : mlpallerrorssubset(&psession->network, &s->densexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
40018 : }
40019 0 : if( s->datatype==1 )
40020 : {
40021 0 : mlpallerrorssparsesubset(&psession->network, &s->sparsexy, s->npoints, trnsubset, trnsubsetsize, &modrep, _state);
40022 : }
40023 0 : if( ae_fp_less(modrep.rmserror,psession->bestrmserror) )
40024 : {
40025 0 : mlpexporttunableparameters(&psession->network, &psession->bestparameters, &pcount, _state);
40026 0 : psession->bestrmserror = modrep.rmserror;
40027 : }
40028 :
40029 : /*
40030 : * Move session back to pool
40031 : */
40032 0 : ae_shared_pool_recycle(sessions, &_psession, _state);
40033 0 : ae_frame_leave(_state);
40034 : }
40035 :
40036 :
40037 : /*************************************************************************
40038 : Serial stub for GPL edition.
40039 : *************************************************************************/
40040 0 : ae_bool _trypexec_mlptrain_mlptrainnetworkx(mlptrainer* s,
40041 : ae_int_t nrestarts,
40042 : ae_int_t algokind,
40043 : /* Integer */ ae_vector* trnsubset,
40044 : ae_int_t trnsubsetsize,
40045 : /* Integer */ ae_vector* valsubset,
40046 : ae_int_t valsubsetsize,
40047 : multilayerperceptron* network,
40048 : mlpreport* rep,
40049 : ae_bool isrootcall,
40050 : ae_shared_pool* sessions,
40051 : ae_state *_state)
40052 : {
40053 0 : return ae_false;
40054 : }
40055 :
40056 :
40057 : /*************************************************************************
40058 : This function trains neural network ensemble passed to this function using
40059 : current dataset and early stopping training algorithm. Each early stopping
40060 : round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
40061 : training rounds is performed in total).
40062 :
40063 :
40064 : -- ALGLIB --
40065 : Copyright 22.08.2012 by Bochkanov Sergey
40066 : *************************************************************************/
40067 0 : static void mlptrain_mlptrainensemblex(mlptrainer* s,
40068 : mlpensemble* ensemble,
40069 : ae_int_t idx0,
40070 : ae_int_t idx1,
40071 : ae_int_t nrestarts,
40072 : ae_int_t trainingmethod,
40073 : sinteger* ngrad,
40074 : ae_bool isrootcall,
40075 : ae_shared_pool* esessions,
40076 : ae_state *_state)
40077 : {
40078 : ae_frame _frame_block;
40079 : ae_int_t pcount;
40080 : ae_int_t nin;
40081 : ae_int_t nout;
40082 : ae_int_t wcount;
40083 : ae_int_t i;
40084 : ae_int_t j;
40085 : ae_int_t k;
40086 : ae_int_t trnsubsetsize;
40087 : ae_int_t valsubsetsize;
40088 : ae_int_t k0;
40089 : sinteger ngrad0;
40090 : sinteger ngrad1;
40091 : mlpetrnsession *psession;
40092 : ae_smart_ptr _psession;
40093 : hqrndstate rs;
40094 :
40095 0 : ae_frame_make(_state, &_frame_block);
40096 0 : memset(&ngrad0, 0, sizeof(ngrad0));
40097 0 : memset(&ngrad1, 0, sizeof(ngrad1));
40098 0 : memset(&_psession, 0, sizeof(_psession));
40099 0 : memset(&rs, 0, sizeof(rs));
40100 0 : _sinteger_init(&ngrad0, _state, ae_true);
40101 0 : _sinteger_init(&ngrad1, _state, ae_true);
40102 0 : ae_smart_ptr_init(&_psession, (void**)&psession, _state, ae_true);
40103 0 : _hqrndstate_init(&rs, _state, ae_true);
40104 :
40105 0 : nin = mlpgetinputscount(&ensemble->network, _state);
40106 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
40107 0 : wcount = mlpgetweightscount(&ensemble->network, _state);
40108 0 : if( mlpissoftmax(&ensemble->network, _state) )
40109 : {
40110 0 : pcount = nin;
40111 : }
40112 : else
40113 : {
40114 0 : pcount = nin+nout;
40115 : }
40116 0 : if( nrestarts<=0 )
40117 : {
40118 0 : nrestarts = 1;
40119 : }
40120 :
40121 : /*
40122 : * Handle degenerate case
40123 : */
40124 0 : if( s->npoints<2 )
40125 : {
40126 0 : for(i=idx0; i<=idx1-1; i++)
40127 : {
40128 0 : for(j=0; j<=wcount-1; j++)
40129 : {
40130 0 : ensemble->weights.ptr.p_double[i*wcount+j] = 0.0;
40131 : }
40132 0 : for(j=0; j<=pcount-1; j++)
40133 : {
40134 0 : ensemble->columnmeans.ptr.p_double[i*pcount+j] = 0.0;
40135 0 : ensemble->columnsigmas.ptr.p_double[i*pcount+j] = 1.0;
40136 : }
40137 : }
40138 0 : ae_frame_leave(_state);
40139 0 : return;
40140 : }
40141 :
40142 : /*
40143 : * Process root call
40144 : */
40145 0 : if( isrootcall )
40146 : {
40147 :
40148 : /*
40149 : * Try parallelization
40150 : * We expect that minimum number of iterations before convergence is 100.
40151 : * Hence is our approach to evaluation of task complexity.
40152 : */
40153 0 : if( ae_fp_greater_eq(ae_maxint(nrestarts, 1, _state)*(idx1-idx0)*rmul3((double)(2*wcount), (double)(s->npoints), (double)(100), _state),smpactivationlevel(_state)) )
40154 : {
40155 0 : if( _trypexec_mlptrain_mlptrainensemblex(s,ensemble,idx0,idx1,nrestarts,trainingmethod,ngrad,isrootcall,esessions, _state) )
40156 : {
40157 0 : ae_frame_leave(_state);
40158 0 : return;
40159 : }
40160 : }
40161 :
40162 : /*
40163 : * Prepare:
40164 : * * prepare MLPETrnSessions
40165 : * * fill ensemble by zeros (helps to detect errors)
40166 : */
40167 0 : mlptrain_initmlpetrnsessions(&ensemble->network, s, esessions, _state);
40168 0 : for(i=idx0; i<=idx1-1; i++)
40169 : {
40170 0 : for(j=0; j<=wcount-1; j++)
40171 : {
40172 0 : ensemble->weights.ptr.p_double[i*wcount+j] = 0.0;
40173 : }
40174 0 : for(j=0; j<=pcount-1; j++)
40175 : {
40176 0 : ensemble->columnmeans.ptr.p_double[i*pcount+j] = 0.0;
40177 0 : ensemble->columnsigmas.ptr.p_double[i*pcount+j] = 0.0;
40178 : }
40179 : }
40180 :
40181 : /*
40182 : * Train in non-root mode and exit
40183 : */
40184 0 : mlptrain_mlptrainensemblex(s, ensemble, idx0, idx1, nrestarts, trainingmethod, ngrad, ae_false, esessions, _state);
40185 0 : ae_frame_leave(_state);
40186 0 : return;
40187 : }
40188 :
40189 : /*
40190 : * Split problem
40191 : */
40192 0 : if( idx1-idx0>=2 )
40193 : {
40194 0 : k0 = (idx1-idx0)/2;
40195 0 : ngrad0.val = 0;
40196 0 : ngrad1.val = 0;
40197 0 : mlptrain_mlptrainensemblex(s, ensemble, idx0, idx0+k0, nrestarts, trainingmethod, &ngrad0, ae_false, esessions, _state);
40198 0 : mlptrain_mlptrainensemblex(s, ensemble, idx0+k0, idx1, nrestarts, trainingmethod, &ngrad1, ae_false, esessions, _state);
40199 0 : ngrad->val = ngrad0.val+ngrad1.val;
40200 0 : ae_frame_leave(_state);
40201 0 : return;
40202 : }
40203 :
40204 : /*
40205 : * Retrieve and prepare session
40206 : */
40207 0 : ae_shared_pool_retrieve(esessions, &_psession, _state);
40208 :
40209 : /*
40210 : * Train
40211 : */
40212 0 : hqrndrandomize(&rs, _state);
40213 0 : for(k=idx0; k<=idx1-1; k++)
40214 : {
40215 :
40216 : /*
40217 : * Split set
40218 : */
40219 0 : trnsubsetsize = 0;
40220 0 : valsubsetsize = 0;
40221 0 : if( trainingmethod==0 )
40222 : {
40223 0 : do
40224 : {
40225 0 : trnsubsetsize = 0;
40226 0 : valsubsetsize = 0;
40227 0 : for(i=0; i<=s->npoints-1; i++)
40228 : {
40229 0 : if( ae_fp_less(ae_randomreal(_state),0.66) )
40230 : {
40231 :
40232 : /*
40233 : * Assign sample to training set
40234 : */
40235 0 : psession->trnsubset.ptr.p_int[trnsubsetsize] = i;
40236 0 : trnsubsetsize = trnsubsetsize+1;
40237 : }
40238 : else
40239 : {
40240 :
40241 : /*
40242 : * Assign sample to validation set
40243 : */
40244 0 : psession->valsubset.ptr.p_int[valsubsetsize] = i;
40245 0 : valsubsetsize = valsubsetsize+1;
40246 : }
40247 : }
40248 : }
40249 0 : while(!(trnsubsetsize!=0&&valsubsetsize!=0));
40250 : }
40251 0 : if( trainingmethod==1 )
40252 : {
40253 0 : valsubsetsize = 0;
40254 0 : trnsubsetsize = s->npoints;
40255 0 : for(i=0; i<=s->npoints-1; i++)
40256 : {
40257 0 : psession->trnsubset.ptr.p_int[i] = hqrnduniformi(&rs, s->npoints, _state);
40258 : }
40259 : }
40260 :
40261 : /*
40262 : * Train
40263 : */
40264 0 : mlptrain_mlptrainnetworkx(s, nrestarts, -1, &psession->trnsubset, trnsubsetsize, &psession->valsubset, valsubsetsize, &psession->network, &psession->mlprep, ae_true, &psession->mlpsessions, _state);
40265 0 : ngrad->val = ngrad->val+psession->mlprep.ngrad;
40266 :
40267 : /*
40268 : * Save results
40269 : */
40270 0 : ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &psession->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
40271 0 : ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcount], 1, &psession->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
40272 0 : ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcount], 1, &psession->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcount,(k+1)*pcount-1));
40273 : }
40274 :
40275 : /*
40276 : * Recycle session
40277 : */
40278 0 : ae_shared_pool_recycle(esessions, &_psession, _state);
40279 0 : ae_frame_leave(_state);
40280 : }
40281 :
40282 :
40283 : /*************************************************************************
40284 : Serial stub for GPL edition.
40285 : *************************************************************************/
40286 0 : ae_bool _trypexec_mlptrain_mlptrainensemblex(mlptrainer* s,
40287 : mlpensemble* ensemble,
40288 : ae_int_t idx0,
40289 : ae_int_t idx1,
40290 : ae_int_t nrestarts,
40291 : ae_int_t trainingmethod,
40292 : sinteger* ngrad,
40293 : ae_bool isrootcall,
40294 : ae_shared_pool* esessions,
40295 : ae_state *_state)
40296 : {
40297 0 : return ae_false;
40298 : }
40299 :
40300 :
40301 : /*************************************************************************
40302 : This function performs step-by-step training of the neural network. Here
40303 : "step-by-step" means that training starts with MLPStartTrainingX call,
40304 : and then user subsequently calls MLPContinueTrainingX to perform one more
40305 : iteration of the training.
40306 :
40307 : After call to this function trainer object remembers network and is ready
40308 : to train it. However, no training is performed until first call to
40309 : MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
40310 : will advance traing progress one iteration further.
40311 :
40312 :
40313 : -- ALGLIB --
40314 : Copyright 13.08.2012 by Bochkanov Sergey
40315 : *************************************************************************/
40316 0 : static void mlptrain_mlpstarttrainingx(mlptrainer* s,
40317 : ae_bool randomstart,
40318 : ae_int_t algokind,
40319 : /* Integer */ ae_vector* subset,
40320 : ae_int_t subsetsize,
40321 : smlptrnsession* session,
40322 : ae_state *_state)
40323 : {
40324 : ae_int_t nin;
40325 : ae_int_t nout;
40326 : ae_int_t wcount;
40327 : ae_int_t ntype;
40328 : ae_int_t ttype;
40329 : ae_int_t i;
40330 :
40331 :
40332 :
40333 : /*
40334 : * Check parameters
40335 : */
40336 0 : ae_assert(s->npoints>=0, "MLPStartTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)", _state);
40337 0 : ae_assert(algokind==0||algokind==-1, "MLPStartTrainingX: unexpected AlgoKind", _state);
40338 0 : if( s->rcpar )
40339 : {
40340 0 : ttype = 0;
40341 : }
40342 : else
40343 : {
40344 0 : ttype = 1;
40345 : }
40346 0 : if( !mlpissoftmax(&session->network, _state) )
40347 : {
40348 0 : ntype = 0;
40349 : }
40350 : else
40351 : {
40352 0 : ntype = 1;
40353 : }
40354 0 : ae_assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the resulting network is not similar to network type in trainer object", _state);
40355 0 : mlpproperties(&session->network, &nin, &nout, &wcount, _state);
40356 0 : ae_assert(s->nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the network.", _state);
40357 0 : ae_assert(s->nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the network.", _state);
40358 0 : ae_assert(subset->cnt>=subsetsize, "MLPStartTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize)", _state);
40359 0 : for(i=0; i<=subsetsize-1; i++)
40360 : {
40361 0 : ae_assert(subset->ptr.p_int[i]>=0&&subset->ptr.p_int[i]<=s->npoints-1, "MLPStartTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)", _state);
40362 : }
40363 :
40364 : /*
40365 : * Prepare session
40366 : */
40367 0 : minlbfgssetcond(&session->optimizer, 0.0, 0.0, s->wstep, s->maxits, _state);
40368 0 : if( s->npoints>0&&subsetsize!=0 )
40369 : {
40370 0 : if( randomstart )
40371 : {
40372 0 : mlprandomize(&session->network, _state);
40373 : }
40374 0 : minlbfgsrestartfrom(&session->optimizer, &session->network.weights, _state);
40375 : }
40376 : else
40377 : {
40378 0 : for(i=0; i<=wcount-1; i++)
40379 : {
40380 0 : session->network.weights.ptr.p_double[i] = (double)(0);
40381 : }
40382 : }
40383 0 : if( algokind==-1 )
40384 : {
40385 0 : session->algoused = s->algokind;
40386 0 : if( s->algokind==1 )
40387 : {
40388 0 : session->minibatchsize = s->minibatchsize;
40389 : }
40390 : }
40391 : else
40392 : {
40393 0 : session->algoused = 0;
40394 : }
40395 0 : hqrndrandomize(&session->generator, _state);
40396 0 : ae_vector_set_length(&session->rstate.ia, 15+1, _state);
40397 0 : ae_vector_set_length(&session->rstate.ra, 1+1, _state);
40398 0 : session->rstate.stage = -1;
40399 0 : }
40400 :
40401 :
40402 : /*************************************************************************
40403 : This function performs step-by-step training of the neural network. Here
40404 : "step-by-step" means that training starts with MLPStartTrainingX call,
40405 : and then user subsequently calls MLPContinueTrainingX to perform one more
40406 : iteration of the training.
40407 :
40408 : This function performs one more iteration of the training and returns
40409 : either True (training continues) or False (training stopped). In case True
40410 : was returned, Network weights are updated according to the current state
40411 : of the optimization progress. In case False was returned, no additional
40412 : updates is performed (previous update of the network weights moved us to
40413 : the final point, and no additional updates is needed).
40414 :
40415 : EXAMPLE:
40416 : >
40417 : > [initialize network and trainer object]
40418 : >
40419 : > MLPStartTraining(Trainer, Network, True)
40420 : > while MLPContinueTraining(Trainer, Network) do
40421 : > [visualize training progress]
40422 : >
40423 :
40424 :
40425 : -- ALGLIB --
40426 : Copyright 13.08.2012 by Bochkanov Sergey
40427 : *************************************************************************/
40428 0 : static ae_bool mlptrain_mlpcontinuetrainingx(mlptrainer* s,
40429 : /* Integer */ ae_vector* subset,
40430 : ae_int_t subsetsize,
40431 : ae_int_t* ngradbatch,
40432 : smlptrnsession* session,
40433 : ae_state *_state)
40434 : {
40435 : ae_int_t nin;
40436 : ae_int_t nout;
40437 : ae_int_t wcount;
40438 : ae_int_t twcount;
40439 : ae_int_t ntype;
40440 : ae_int_t ttype;
40441 : double decay;
40442 : double v;
40443 : ae_int_t i;
40444 : ae_int_t j;
40445 : ae_int_t k;
40446 : ae_int_t trnsetsize;
40447 : ae_int_t epoch;
40448 : ae_int_t minibatchcount;
40449 : ae_int_t minibatchidx;
40450 : ae_int_t cursize;
40451 : ae_int_t idx0;
40452 : ae_int_t idx1;
40453 : ae_bool result;
40454 :
40455 :
40456 :
40457 : /*
40458 : * Reverse communication preparations
40459 : * I know it looks ugly, but it works the same way
40460 : * anywhere from C++ to Python.
40461 : *
40462 : * This code initializes locals by:
40463 : * * random values determined during code
40464 : * generation - on first subroutine call
40465 : * * values from previous call - on subsequent calls
40466 : */
40467 0 : if( session->rstate.stage>=0 )
40468 : {
40469 0 : nin = session->rstate.ia.ptr.p_int[0];
40470 0 : nout = session->rstate.ia.ptr.p_int[1];
40471 0 : wcount = session->rstate.ia.ptr.p_int[2];
40472 0 : twcount = session->rstate.ia.ptr.p_int[3];
40473 0 : ntype = session->rstate.ia.ptr.p_int[4];
40474 0 : ttype = session->rstate.ia.ptr.p_int[5];
40475 0 : i = session->rstate.ia.ptr.p_int[6];
40476 0 : j = session->rstate.ia.ptr.p_int[7];
40477 0 : k = session->rstate.ia.ptr.p_int[8];
40478 0 : trnsetsize = session->rstate.ia.ptr.p_int[9];
40479 0 : epoch = session->rstate.ia.ptr.p_int[10];
40480 0 : minibatchcount = session->rstate.ia.ptr.p_int[11];
40481 0 : minibatchidx = session->rstate.ia.ptr.p_int[12];
40482 0 : cursize = session->rstate.ia.ptr.p_int[13];
40483 0 : idx0 = session->rstate.ia.ptr.p_int[14];
40484 0 : idx1 = session->rstate.ia.ptr.p_int[15];
40485 0 : decay = session->rstate.ra.ptr.p_double[0];
40486 0 : v = session->rstate.ra.ptr.p_double[1];
40487 : }
40488 : else
40489 : {
40490 0 : nin = 359;
40491 0 : nout = -58;
40492 0 : wcount = -919;
40493 0 : twcount = -909;
40494 0 : ntype = 81;
40495 0 : ttype = 255;
40496 0 : i = 74;
40497 0 : j = -788;
40498 0 : k = 809;
40499 0 : trnsetsize = 205;
40500 0 : epoch = -838;
40501 0 : minibatchcount = 939;
40502 0 : minibatchidx = -526;
40503 0 : cursize = 763;
40504 0 : idx0 = -541;
40505 0 : idx1 = -698;
40506 0 : decay = -900;
40507 0 : v = -318;
40508 : }
40509 0 : if( session->rstate.stage==0 )
40510 : {
40511 0 : goto lbl_0;
40512 : }
40513 :
40514 : /*
40515 : * Routine body
40516 : */
40517 :
40518 : /*
40519 : * Check correctness of inputs
40520 : */
40521 0 : ae_assert(s->npoints>=0, "MLPContinueTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0).", _state);
40522 0 : if( s->rcpar )
40523 : {
40524 0 : ttype = 0;
40525 : }
40526 : else
40527 : {
40528 0 : ttype = 1;
40529 : }
40530 0 : if( !mlpissoftmax(&session->network, _state) )
40531 : {
40532 0 : ntype = 0;
40533 : }
40534 : else
40535 : {
40536 0 : ntype = 1;
40537 : }
40538 0 : ae_assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the resulting network is not similar to network type in trainer object.", _state);
40539 0 : mlpproperties(&session->network, &nin, &nout, &wcount, _state);
40540 0 : ae_assert(s->nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the network.", _state);
40541 0 : ae_assert(s->nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the network.", _state);
40542 0 : ae_assert(subset->cnt>=subsetsize, "MLPContinueTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize).", _state);
40543 0 : for(i=0; i<=subsetsize-1; i++)
40544 : {
40545 0 : ae_assert(subset->ptr.p_int[i]>=0&&subset->ptr.p_int[i]<=s->npoints-1, "MLPContinueTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1).", _state);
40546 : }
40547 :
40548 : /*
40549 : * Quick exit on empty training set
40550 : */
40551 0 : if( s->npoints==0||subsetsize==0 )
40552 : {
40553 0 : result = ae_false;
40554 0 : return result;
40555 : }
40556 :
40557 : /*
40558 : * Minibatch training
40559 : */
40560 0 : if( session->algoused==1 )
40561 : {
40562 0 : ae_assert(ae_false, "MINIBATCH TRAINING IS NOT IMPLEMENTED YET", _state);
40563 : }
40564 :
40565 : /*
40566 : * Last option: full batch training
40567 : */
40568 0 : decay = s->decay;
40569 0 : lbl_1:
40570 0 : if( !minlbfgsiteration(&session->optimizer, _state) )
40571 : {
40572 0 : goto lbl_2;
40573 : }
40574 0 : if( !session->optimizer.xupdated )
40575 : {
40576 0 : goto lbl_3;
40577 : }
40578 0 : ae_v_move(&session->network.weights.ptr.p_double[0], 1, &session->optimizer.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
40579 0 : session->rstate.stage = 0;
40580 0 : goto lbl_rcomm;
40581 0 : lbl_0:
40582 0 : lbl_3:
40583 0 : ae_v_move(&session->network.weights.ptr.p_double[0], 1, &session->optimizer.x.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
40584 0 : if( s->datatype==0 )
40585 : {
40586 0 : mlpgradbatchsubset(&session->network, &s->densexy, s->npoints, subset, subsetsize, &session->optimizer.f, &session->optimizer.g, _state);
40587 : }
40588 0 : if( s->datatype==1 )
40589 : {
40590 0 : mlpgradbatchsparsesubset(&session->network, &s->sparsexy, s->npoints, subset, subsetsize, &session->optimizer.f, &session->optimizer.g, _state);
40591 : }
40592 :
40593 : /*
40594 : * Increment number of operations performed on batch gradient
40595 : */
40596 0 : *ngradbatch = *ngradbatch+1;
40597 0 : v = ae_v_dotproduct(&session->network.weights.ptr.p_double[0], 1, &session->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1));
40598 0 : session->optimizer.f = session->optimizer.f+0.5*decay*v;
40599 0 : ae_v_addd(&session->optimizer.g.ptr.p_double[0], 1, &session->network.weights.ptr.p_double[0], 1, ae_v_len(0,wcount-1), decay);
40600 0 : goto lbl_1;
40601 0 : lbl_2:
40602 0 : minlbfgsresultsbuf(&session->optimizer, &session->network.weights, &session->optimizerrep, _state);
40603 0 : result = ae_false;
40604 0 : return result;
40605 :
40606 : /*
40607 : * Saving state
40608 : */
40609 0 : lbl_rcomm:
40610 0 : result = ae_true;
40611 0 : session->rstate.ia.ptr.p_int[0] = nin;
40612 0 : session->rstate.ia.ptr.p_int[1] = nout;
40613 0 : session->rstate.ia.ptr.p_int[2] = wcount;
40614 0 : session->rstate.ia.ptr.p_int[3] = twcount;
40615 0 : session->rstate.ia.ptr.p_int[4] = ntype;
40616 0 : session->rstate.ia.ptr.p_int[5] = ttype;
40617 0 : session->rstate.ia.ptr.p_int[6] = i;
40618 0 : session->rstate.ia.ptr.p_int[7] = j;
40619 0 : session->rstate.ia.ptr.p_int[8] = k;
40620 0 : session->rstate.ia.ptr.p_int[9] = trnsetsize;
40621 0 : session->rstate.ia.ptr.p_int[10] = epoch;
40622 0 : session->rstate.ia.ptr.p_int[11] = minibatchcount;
40623 0 : session->rstate.ia.ptr.p_int[12] = minibatchidx;
40624 0 : session->rstate.ia.ptr.p_int[13] = cursize;
40625 0 : session->rstate.ia.ptr.p_int[14] = idx0;
40626 0 : session->rstate.ia.ptr.p_int[15] = idx1;
40627 0 : session->rstate.ra.ptr.p_double[0] = decay;
40628 0 : session->rstate.ra.ptr.p_double[1] = v;
40629 0 : return result;
40630 : }
40631 :
40632 :
40633 : /*************************************************************************
40634 : Internal bagging subroutine.
40635 :
40636 : -- ALGLIB --
40637 : Copyright 19.02.2009 by Bochkanov Sergey
40638 : *************************************************************************/
40639 0 : static void mlptrain_mlpebagginginternal(mlpensemble* ensemble,
40640 : /* Real */ ae_matrix* xy,
40641 : ae_int_t npoints,
40642 : double decay,
40643 : ae_int_t restarts,
40644 : double wstep,
40645 : ae_int_t maxits,
40646 : ae_bool lmalgorithm,
40647 : ae_int_t* info,
40648 : mlpreport* rep,
40649 : mlpcvreport* ooberrors,
40650 : ae_state *_state)
40651 : {
40652 : ae_frame _frame_block;
40653 : ae_matrix xys;
40654 : ae_vector s;
40655 : ae_matrix oobbuf;
40656 : ae_vector oobcntbuf;
40657 : ae_vector x;
40658 : ae_vector y;
40659 : ae_vector dy;
40660 : ae_vector dsbuf;
40661 : ae_int_t ccnt;
40662 : ae_int_t pcnt;
40663 : ae_int_t i;
40664 : ae_int_t j;
40665 : ae_int_t k;
40666 : double v;
40667 : mlpreport tmprep;
40668 : ae_int_t nin;
40669 : ae_int_t nout;
40670 : ae_int_t wcount;
40671 : hqrndstate rs;
40672 :
40673 0 : ae_frame_make(_state, &_frame_block);
40674 0 : memset(&xys, 0, sizeof(xys));
40675 0 : memset(&s, 0, sizeof(s));
40676 0 : memset(&oobbuf, 0, sizeof(oobbuf));
40677 0 : memset(&oobcntbuf, 0, sizeof(oobcntbuf));
40678 0 : memset(&x, 0, sizeof(x));
40679 0 : memset(&y, 0, sizeof(y));
40680 0 : memset(&dy, 0, sizeof(dy));
40681 0 : memset(&dsbuf, 0, sizeof(dsbuf));
40682 0 : memset(&tmprep, 0, sizeof(tmprep));
40683 0 : memset(&rs, 0, sizeof(rs));
40684 0 : *info = 0;
40685 0 : _mlpreport_clear(rep);
40686 0 : _mlpcvreport_clear(ooberrors);
40687 0 : ae_matrix_init(&xys, 0, 0, DT_REAL, _state, ae_true);
40688 0 : ae_vector_init(&s, 0, DT_BOOL, _state, ae_true);
40689 0 : ae_matrix_init(&oobbuf, 0, 0, DT_REAL, _state, ae_true);
40690 0 : ae_vector_init(&oobcntbuf, 0, DT_INT, _state, ae_true);
40691 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
40692 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
40693 0 : ae_vector_init(&dy, 0, DT_REAL, _state, ae_true);
40694 0 : ae_vector_init(&dsbuf, 0, DT_REAL, _state, ae_true);
40695 0 : _mlpreport_init(&tmprep, _state, ae_true);
40696 0 : _hqrndstate_init(&rs, _state, ae_true);
40697 :
40698 0 : nin = mlpgetinputscount(&ensemble->network, _state);
40699 0 : nout = mlpgetoutputscount(&ensemble->network, _state);
40700 0 : wcount = mlpgetweightscount(&ensemble->network, _state);
40701 :
40702 : /*
40703 : * Test for inputs
40704 : */
40705 0 : if( (!lmalgorithm&&ae_fp_eq(wstep,(double)(0)))&&maxits==0 )
40706 : {
40707 0 : *info = -8;
40708 0 : ae_frame_leave(_state);
40709 0 : return;
40710 : }
40711 0 : if( ((npoints<=0||restarts<1)||ae_fp_less(wstep,(double)(0)))||maxits<0 )
40712 : {
40713 0 : *info = -1;
40714 0 : ae_frame_leave(_state);
40715 0 : return;
40716 : }
40717 0 : if( mlpissoftmax(&ensemble->network, _state) )
40718 : {
40719 0 : for(i=0; i<=npoints-1; i++)
40720 : {
40721 0 : if( ae_round(xy->ptr.pp_double[i][nin], _state)<0||ae_round(xy->ptr.pp_double[i][nin], _state)>=nout )
40722 : {
40723 0 : *info = -2;
40724 0 : ae_frame_leave(_state);
40725 0 : return;
40726 : }
40727 : }
40728 : }
40729 :
40730 : /*
40731 : * allocate temporaries
40732 : */
40733 0 : *info = 2;
40734 0 : rep->ngrad = 0;
40735 0 : rep->nhess = 0;
40736 0 : rep->ncholesky = 0;
40737 0 : ooberrors->relclserror = (double)(0);
40738 0 : ooberrors->avgce = (double)(0);
40739 0 : ooberrors->rmserror = (double)(0);
40740 0 : ooberrors->avgerror = (double)(0);
40741 0 : ooberrors->avgrelerror = (double)(0);
40742 0 : if( mlpissoftmax(&ensemble->network, _state) )
40743 : {
40744 0 : ccnt = nin+1;
40745 0 : pcnt = nin;
40746 : }
40747 : else
40748 : {
40749 0 : ccnt = nin+nout;
40750 0 : pcnt = nin+nout;
40751 : }
40752 0 : ae_matrix_set_length(&xys, npoints, ccnt, _state);
40753 0 : ae_vector_set_length(&s, npoints, _state);
40754 0 : ae_matrix_set_length(&oobbuf, npoints, nout, _state);
40755 0 : ae_vector_set_length(&oobcntbuf, npoints, _state);
40756 0 : ae_vector_set_length(&x, nin, _state);
40757 0 : ae_vector_set_length(&y, nout, _state);
40758 0 : if( mlpissoftmax(&ensemble->network, _state) )
40759 : {
40760 0 : ae_vector_set_length(&dy, 1, _state);
40761 : }
40762 : else
40763 : {
40764 0 : ae_vector_set_length(&dy, nout, _state);
40765 : }
40766 0 : for(i=0; i<=npoints-1; i++)
40767 : {
40768 0 : for(j=0; j<=nout-1; j++)
40769 : {
40770 0 : oobbuf.ptr.pp_double[i][j] = (double)(0);
40771 : }
40772 : }
40773 0 : for(i=0; i<=npoints-1; i++)
40774 : {
40775 0 : oobcntbuf.ptr.p_int[i] = 0;
40776 : }
40777 :
40778 : /*
40779 : * main bagging cycle
40780 : */
40781 0 : hqrndrandomize(&rs, _state);
40782 0 : for(k=0; k<=ensemble->ensemblesize-1; k++)
40783 : {
40784 :
40785 : /*
40786 : * prepare dataset
40787 : */
40788 0 : for(i=0; i<=npoints-1; i++)
40789 : {
40790 0 : s.ptr.p_bool[i] = ae_false;
40791 : }
40792 0 : for(i=0; i<=npoints-1; i++)
40793 : {
40794 0 : j = hqrnduniformi(&rs, npoints, _state);
40795 0 : s.ptr.p_bool[j] = ae_true;
40796 0 : ae_v_move(&xys.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,ccnt-1));
40797 : }
40798 :
40799 : /*
40800 : * train
40801 : */
40802 0 : if( lmalgorithm )
40803 : {
40804 0 : mlptrainlm(&ensemble->network, &xys, npoints, decay, restarts, info, &tmprep, _state);
40805 : }
40806 : else
40807 : {
40808 0 : mlptrainlbfgs(&ensemble->network, &xys, npoints, decay, restarts, wstep, maxits, info, &tmprep, _state);
40809 : }
40810 0 : if( *info<0 )
40811 : {
40812 0 : ae_frame_leave(_state);
40813 0 : return;
40814 : }
40815 :
40816 : /*
40817 : * save results
40818 : */
40819 0 : rep->ngrad = rep->ngrad+tmprep.ngrad;
40820 0 : rep->nhess = rep->nhess+tmprep.nhess;
40821 0 : rep->ncholesky = rep->ncholesky+tmprep.ncholesky;
40822 0 : ae_v_move(&ensemble->weights.ptr.p_double[k*wcount], 1, &ensemble->network.weights.ptr.p_double[0], 1, ae_v_len(k*wcount,(k+1)*wcount-1));
40823 0 : ae_v_move(&ensemble->columnmeans.ptr.p_double[k*pcnt], 1, &ensemble->network.columnmeans.ptr.p_double[0], 1, ae_v_len(k*pcnt,(k+1)*pcnt-1));
40824 0 : ae_v_move(&ensemble->columnsigmas.ptr.p_double[k*pcnt], 1, &ensemble->network.columnsigmas.ptr.p_double[0], 1, ae_v_len(k*pcnt,(k+1)*pcnt-1));
40825 :
40826 : /*
40827 : * OOB estimates
40828 : */
40829 0 : for(i=0; i<=npoints-1; i++)
40830 : {
40831 0 : if( !s.ptr.p_bool[i] )
40832 : {
40833 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nin-1));
40834 0 : mlpprocess(&ensemble->network, &x, &y, _state);
40835 0 : ae_v_add(&oobbuf.ptr.pp_double[i][0], 1, &y.ptr.p_double[0], 1, ae_v_len(0,nout-1));
40836 0 : oobcntbuf.ptr.p_int[i] = oobcntbuf.ptr.p_int[i]+1;
40837 : }
40838 : }
40839 : }
40840 :
40841 : /*
40842 : * OOB estimates
40843 : */
40844 0 : if( mlpissoftmax(&ensemble->network, _state) )
40845 : {
40846 0 : dserrallocate(nout, &dsbuf, _state);
40847 : }
40848 : else
40849 : {
40850 0 : dserrallocate(-nout, &dsbuf, _state);
40851 : }
40852 0 : for(i=0; i<=npoints-1; i++)
40853 : {
40854 0 : if( oobcntbuf.ptr.p_int[i]!=0 )
40855 : {
40856 0 : v = (double)1/(double)oobcntbuf.ptr.p_int[i];
40857 0 : ae_v_moved(&y.ptr.p_double[0], 1, &oobbuf.ptr.pp_double[i][0], 1, ae_v_len(0,nout-1), v);
40858 0 : if( mlpissoftmax(&ensemble->network, _state) )
40859 : {
40860 0 : dy.ptr.p_double[0] = xy->ptr.pp_double[i][nin];
40861 : }
40862 : else
40863 : {
40864 0 : ae_v_moved(&dy.ptr.p_double[0], 1, &xy->ptr.pp_double[i][nin], 1, ae_v_len(0,nout-1), v);
40865 : }
40866 0 : dserraccumulate(&dsbuf, &y, &dy, _state);
40867 : }
40868 : }
40869 0 : dserrfinish(&dsbuf, _state);
40870 0 : ooberrors->relclserror = dsbuf.ptr.p_double[0];
40871 0 : ooberrors->avgce = dsbuf.ptr.p_double[1];
40872 0 : ooberrors->rmserror = dsbuf.ptr.p_double[2];
40873 0 : ooberrors->avgerror = dsbuf.ptr.p_double[3];
40874 0 : ooberrors->avgrelerror = dsbuf.ptr.p_double[4];
40875 0 : ae_frame_leave(_state);
40876 : }
40877 :
40878 :
40879 : /*************************************************************************
40880 : This function initializes temporaries needed for training session.
40881 :
40882 :
40883 : -- ALGLIB --
40884 : Copyright 01.07.2013 by Bochkanov Sergey
40885 : *************************************************************************/
40886 0 : static void mlptrain_initmlptrnsession(multilayerperceptron* networktrained,
40887 : ae_bool randomizenetwork,
40888 : mlptrainer* trainer,
40889 : smlptrnsession* session,
40890 : ae_state *_state)
40891 : {
40892 : ae_frame _frame_block;
40893 : ae_int_t nin;
40894 : ae_int_t nout;
40895 : ae_int_t wcount;
40896 : ae_int_t pcount;
40897 : ae_vector dummysubset;
40898 :
40899 0 : ae_frame_make(_state, &_frame_block);
40900 0 : memset(&dummysubset, 0, sizeof(dummysubset));
40901 0 : ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
40902 :
40903 :
40904 : /*
40905 : * Prepare network:
40906 : * * copy input network to Session.Network
40907 : * * re-initialize preprocessor and weights if RandomizeNetwork=True
40908 : */
40909 0 : mlpcopy(networktrained, &session->network, _state);
40910 0 : if( randomizenetwork )
40911 : {
40912 0 : ae_assert(trainer->datatype==0||trainer->datatype==1, "InitTemporaries: unexpected Trainer.DataType", _state);
40913 0 : if( trainer->datatype==0 )
40914 : {
40915 0 : mlpinitpreprocessorsubset(&session->network, &trainer->densexy, trainer->npoints, &dummysubset, -1, _state);
40916 : }
40917 0 : if( trainer->datatype==1 )
40918 : {
40919 0 : mlpinitpreprocessorsparsesubset(&session->network, &trainer->sparsexy, trainer->npoints, &dummysubset, -1, _state);
40920 : }
40921 0 : mlprandomize(&session->network, _state);
40922 0 : session->randomizenetwork = ae_true;
40923 : }
40924 : else
40925 : {
40926 0 : session->randomizenetwork = ae_false;
40927 : }
40928 :
40929 : /*
40930 : * Determine network geometry and initialize optimizer
40931 : */
40932 0 : mlpproperties(&session->network, &nin, &nout, &wcount, _state);
40933 0 : minlbfgscreate(wcount, ae_minint(wcount, trainer->lbfgsfactor, _state), &session->network.weights, &session->optimizer, _state);
40934 0 : minlbfgssetxrep(&session->optimizer, ae_true, _state);
40935 :
40936 : /*
40937 : * Create buffers
40938 : */
40939 0 : ae_vector_set_length(&session->wbuf0, wcount, _state);
40940 0 : ae_vector_set_length(&session->wbuf1, wcount, _state);
40941 :
40942 : /*
40943 : * Initialize session result
40944 : */
40945 0 : mlpexporttunableparameters(&session->network, &session->bestparameters, &pcount, _state);
40946 0 : session->bestrmserror = ae_maxrealnumber;
40947 0 : ae_frame_leave(_state);
40948 0 : }
40949 :
40950 :
40951 : /*************************************************************************
40952 : This function initializes temporaries needed for training session.
40953 :
40954 : *************************************************************************/
40955 0 : static void mlptrain_initmlptrnsessions(multilayerperceptron* networktrained,
40956 : ae_bool randomizenetwork,
40957 : mlptrainer* trainer,
40958 : ae_shared_pool* sessions,
40959 : ae_state *_state)
40960 : {
40961 : ae_frame _frame_block;
40962 : ae_vector dummysubset;
40963 : smlptrnsession t;
40964 : smlptrnsession *p;
40965 : ae_smart_ptr _p;
40966 :
40967 0 : ae_frame_make(_state, &_frame_block);
40968 0 : memset(&dummysubset, 0, sizeof(dummysubset));
40969 0 : memset(&t, 0, sizeof(t));
40970 0 : memset(&_p, 0, sizeof(_p));
40971 0 : ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
40972 0 : _smlptrnsession_init(&t, _state, ae_true);
40973 0 : ae_smart_ptr_init(&_p, (void**)&p, _state, ae_true);
40974 :
40975 0 : if( ae_shared_pool_is_initialized(sessions) )
40976 : {
40977 :
40978 : /*
40979 : * Pool was already initialized.
40980 : * Clear sessions stored in the pool.
40981 : */
40982 0 : ae_shared_pool_first_recycled(sessions, &_p, _state);
40983 0 : while(p!=NULL)
40984 : {
40985 0 : ae_assert(mlpsamearchitecture(&p->network, networktrained, _state), "InitMLPTrnSessions: internal consistency error", _state);
40986 0 : p->bestrmserror = ae_maxrealnumber;
40987 0 : ae_shared_pool_next_recycled(sessions, &_p, _state);
40988 : }
40989 : }
40990 : else
40991 : {
40992 :
40993 : /*
40994 : * Prepare session and seed pool
40995 : */
40996 0 : mlptrain_initmlptrnsession(networktrained, randomizenetwork, trainer, &t, _state);
40997 0 : ae_shared_pool_set_seed(sessions, &t, sizeof(t), _smlptrnsession_init, _smlptrnsession_init_copy, _smlptrnsession_destroy, _state);
40998 : }
40999 0 : ae_frame_leave(_state);
41000 0 : }
41001 :
41002 :
41003 : /*************************************************************************
41004 : This function initializes temporaries needed for ensemble training.
41005 :
41006 : *************************************************************************/
41007 0 : static void mlptrain_initmlpetrnsession(multilayerperceptron* individualnetwork,
41008 : mlptrainer* trainer,
41009 : mlpetrnsession* session,
41010 : ae_state *_state)
41011 : {
41012 : ae_frame _frame_block;
41013 : ae_vector dummysubset;
41014 :
41015 0 : ae_frame_make(_state, &_frame_block);
41016 0 : memset(&dummysubset, 0, sizeof(dummysubset));
41017 0 : ae_vector_init(&dummysubset, 0, DT_INT, _state, ae_true);
41018 :
41019 :
41020 : /*
41021 : * Prepare network:
41022 : * * copy input network to Session.Network
41023 : * * re-initialize preprocessor and weights if RandomizeNetwork=True
41024 : */
41025 0 : mlpcopy(individualnetwork, &session->network, _state);
41026 0 : mlptrain_initmlptrnsessions(individualnetwork, ae_true, trainer, &session->mlpsessions, _state);
41027 0 : ivectorsetlengthatleast(&session->trnsubset, trainer->npoints, _state);
41028 0 : ivectorsetlengthatleast(&session->valsubset, trainer->npoints, _state);
41029 0 : ae_frame_leave(_state);
41030 0 : }
41031 :
41032 :
41033 : /*************************************************************************
41034 : This function initializes temporaries needed for training session.
41035 :
41036 : *************************************************************************/
41037 0 : static void mlptrain_initmlpetrnsessions(multilayerperceptron* individualnetwork,
41038 : mlptrainer* trainer,
41039 : ae_shared_pool* sessions,
41040 : ae_state *_state)
41041 : {
41042 : ae_frame _frame_block;
41043 : mlpetrnsession t;
41044 :
41045 0 : ae_frame_make(_state, &_frame_block);
41046 0 : memset(&t, 0, sizeof(t));
41047 0 : _mlpetrnsession_init(&t, _state, ae_true);
41048 :
41049 0 : if( !ae_shared_pool_is_initialized(sessions) )
41050 : {
41051 0 : mlptrain_initmlpetrnsession(individualnetwork, trainer, &t, _state);
41052 0 : ae_shared_pool_set_seed(sessions, &t, sizeof(t), _mlpetrnsession_init, _mlpetrnsession_init_copy, _mlpetrnsession_destroy, _state);
41053 : }
41054 0 : ae_frame_leave(_state);
41055 0 : }
41056 :
41057 :
41058 0 : void _mlpreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
41059 : {
41060 0 : mlpreport *p = (mlpreport*)_p;
41061 0 : ae_touch_ptr((void*)p);
41062 0 : }
41063 :
41064 :
41065 0 : void _mlpreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41066 : {
41067 0 : mlpreport *dst = (mlpreport*)_dst;
41068 0 : mlpreport *src = (mlpreport*)_src;
41069 0 : dst->relclserror = src->relclserror;
41070 0 : dst->avgce = src->avgce;
41071 0 : dst->rmserror = src->rmserror;
41072 0 : dst->avgerror = src->avgerror;
41073 0 : dst->avgrelerror = src->avgrelerror;
41074 0 : dst->ngrad = src->ngrad;
41075 0 : dst->nhess = src->nhess;
41076 0 : dst->ncholesky = src->ncholesky;
41077 0 : }
41078 :
41079 :
41080 0 : void _mlpreport_clear(void* _p)
41081 : {
41082 0 : mlpreport *p = (mlpreport*)_p;
41083 0 : ae_touch_ptr((void*)p);
41084 0 : }
41085 :
41086 :
41087 0 : void _mlpreport_destroy(void* _p)
41088 : {
41089 0 : mlpreport *p = (mlpreport*)_p;
41090 0 : ae_touch_ptr((void*)p);
41091 0 : }
41092 :
41093 :
41094 0 : void _mlpcvreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
41095 : {
41096 0 : mlpcvreport *p = (mlpcvreport*)_p;
41097 0 : ae_touch_ptr((void*)p);
41098 0 : }
41099 :
41100 :
41101 0 : void _mlpcvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41102 : {
41103 0 : mlpcvreport *dst = (mlpcvreport*)_dst;
41104 0 : mlpcvreport *src = (mlpcvreport*)_src;
41105 0 : dst->relclserror = src->relclserror;
41106 0 : dst->avgce = src->avgce;
41107 0 : dst->rmserror = src->rmserror;
41108 0 : dst->avgerror = src->avgerror;
41109 0 : dst->avgrelerror = src->avgrelerror;
41110 0 : }
41111 :
41112 :
41113 0 : void _mlpcvreport_clear(void* _p)
41114 : {
41115 0 : mlpcvreport *p = (mlpcvreport*)_p;
41116 0 : ae_touch_ptr((void*)p);
41117 0 : }
41118 :
41119 :
41120 0 : void _mlpcvreport_destroy(void* _p)
41121 : {
41122 0 : mlpcvreport *p = (mlpcvreport*)_p;
41123 0 : ae_touch_ptr((void*)p);
41124 0 : }
41125 :
41126 :
41127 0 : void _smlptrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic)
41128 : {
41129 0 : smlptrnsession *p = (smlptrnsession*)_p;
41130 0 : ae_touch_ptr((void*)p);
41131 0 : ae_vector_init(&p->bestparameters, 0, DT_REAL, _state, make_automatic);
41132 0 : _multilayerperceptron_init(&p->network, _state, make_automatic);
41133 0 : _minlbfgsstate_init(&p->optimizer, _state, make_automatic);
41134 0 : _minlbfgsreport_init(&p->optimizerrep, _state, make_automatic);
41135 0 : ae_vector_init(&p->wbuf0, 0, DT_REAL, _state, make_automatic);
41136 0 : ae_vector_init(&p->wbuf1, 0, DT_REAL, _state, make_automatic);
41137 0 : ae_vector_init(&p->allminibatches, 0, DT_INT, _state, make_automatic);
41138 0 : ae_vector_init(&p->currentminibatch, 0, DT_INT, _state, make_automatic);
41139 0 : _rcommstate_init(&p->rstate, _state, make_automatic);
41140 0 : _hqrndstate_init(&p->generator, _state, make_automatic);
41141 0 : }
41142 :
41143 :
41144 0 : void _smlptrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41145 : {
41146 0 : smlptrnsession *dst = (smlptrnsession*)_dst;
41147 0 : smlptrnsession *src = (smlptrnsession*)_src;
41148 0 : ae_vector_init_copy(&dst->bestparameters, &src->bestparameters, _state, make_automatic);
41149 0 : dst->bestrmserror = src->bestrmserror;
41150 0 : dst->randomizenetwork = src->randomizenetwork;
41151 0 : _multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
41152 0 : _minlbfgsstate_init_copy(&dst->optimizer, &src->optimizer, _state, make_automatic);
41153 0 : _minlbfgsreport_init_copy(&dst->optimizerrep, &src->optimizerrep, _state, make_automatic);
41154 0 : ae_vector_init_copy(&dst->wbuf0, &src->wbuf0, _state, make_automatic);
41155 0 : ae_vector_init_copy(&dst->wbuf1, &src->wbuf1, _state, make_automatic);
41156 0 : ae_vector_init_copy(&dst->allminibatches, &src->allminibatches, _state, make_automatic);
41157 0 : ae_vector_init_copy(&dst->currentminibatch, &src->currentminibatch, _state, make_automatic);
41158 0 : _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
41159 0 : dst->algoused = src->algoused;
41160 0 : dst->minibatchsize = src->minibatchsize;
41161 0 : _hqrndstate_init_copy(&dst->generator, &src->generator, _state, make_automatic);
41162 0 : }
41163 :
41164 :
41165 0 : void _smlptrnsession_clear(void* _p)
41166 : {
41167 0 : smlptrnsession *p = (smlptrnsession*)_p;
41168 0 : ae_touch_ptr((void*)p);
41169 0 : ae_vector_clear(&p->bestparameters);
41170 0 : _multilayerperceptron_clear(&p->network);
41171 0 : _minlbfgsstate_clear(&p->optimizer);
41172 0 : _minlbfgsreport_clear(&p->optimizerrep);
41173 0 : ae_vector_clear(&p->wbuf0);
41174 0 : ae_vector_clear(&p->wbuf1);
41175 0 : ae_vector_clear(&p->allminibatches);
41176 0 : ae_vector_clear(&p->currentminibatch);
41177 0 : _rcommstate_clear(&p->rstate);
41178 0 : _hqrndstate_clear(&p->generator);
41179 0 : }
41180 :
41181 :
41182 0 : void _smlptrnsession_destroy(void* _p)
41183 : {
41184 0 : smlptrnsession *p = (smlptrnsession*)_p;
41185 0 : ae_touch_ptr((void*)p);
41186 0 : ae_vector_destroy(&p->bestparameters);
41187 0 : _multilayerperceptron_destroy(&p->network);
41188 0 : _minlbfgsstate_destroy(&p->optimizer);
41189 0 : _minlbfgsreport_destroy(&p->optimizerrep);
41190 0 : ae_vector_destroy(&p->wbuf0);
41191 0 : ae_vector_destroy(&p->wbuf1);
41192 0 : ae_vector_destroy(&p->allminibatches);
41193 0 : ae_vector_destroy(&p->currentminibatch);
41194 0 : _rcommstate_destroy(&p->rstate);
41195 0 : _hqrndstate_destroy(&p->generator);
41196 0 : }
41197 :
41198 :
41199 0 : void _mlpetrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic)
41200 : {
41201 0 : mlpetrnsession *p = (mlpetrnsession*)_p;
41202 0 : ae_touch_ptr((void*)p);
41203 0 : ae_vector_init(&p->trnsubset, 0, DT_INT, _state, make_automatic);
41204 0 : ae_vector_init(&p->valsubset, 0, DT_INT, _state, make_automatic);
41205 0 : ae_shared_pool_init(&p->mlpsessions, _state, make_automatic);
41206 0 : _mlpreport_init(&p->mlprep, _state, make_automatic);
41207 0 : _multilayerperceptron_init(&p->network, _state, make_automatic);
41208 0 : }
41209 :
41210 :
41211 0 : void _mlpetrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41212 : {
41213 0 : mlpetrnsession *dst = (mlpetrnsession*)_dst;
41214 0 : mlpetrnsession *src = (mlpetrnsession*)_src;
41215 0 : ae_vector_init_copy(&dst->trnsubset, &src->trnsubset, _state, make_automatic);
41216 0 : ae_vector_init_copy(&dst->valsubset, &src->valsubset, _state, make_automatic);
41217 0 : ae_shared_pool_init_copy(&dst->mlpsessions, &src->mlpsessions, _state, make_automatic);
41218 0 : _mlpreport_init_copy(&dst->mlprep, &src->mlprep, _state, make_automatic);
41219 0 : _multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
41220 0 : }
41221 :
41222 :
41223 0 : void _mlpetrnsession_clear(void* _p)
41224 : {
41225 0 : mlpetrnsession *p = (mlpetrnsession*)_p;
41226 0 : ae_touch_ptr((void*)p);
41227 0 : ae_vector_clear(&p->trnsubset);
41228 0 : ae_vector_clear(&p->valsubset);
41229 0 : ae_shared_pool_clear(&p->mlpsessions);
41230 0 : _mlpreport_clear(&p->mlprep);
41231 0 : _multilayerperceptron_clear(&p->network);
41232 0 : }
41233 :
41234 :
41235 0 : void _mlpetrnsession_destroy(void* _p)
41236 : {
41237 0 : mlpetrnsession *p = (mlpetrnsession*)_p;
41238 0 : ae_touch_ptr((void*)p);
41239 0 : ae_vector_destroy(&p->trnsubset);
41240 0 : ae_vector_destroy(&p->valsubset);
41241 0 : ae_shared_pool_destroy(&p->mlpsessions);
41242 0 : _mlpreport_destroy(&p->mlprep);
41243 0 : _multilayerperceptron_destroy(&p->network);
41244 0 : }
41245 :
41246 :
41247 0 : void _mlptrainer_init(void* _p, ae_state *_state, ae_bool make_automatic)
41248 : {
41249 0 : mlptrainer *p = (mlptrainer*)_p;
41250 0 : ae_touch_ptr((void*)p);
41251 0 : ae_matrix_init(&p->densexy, 0, 0, DT_REAL, _state, make_automatic);
41252 0 : _sparsematrix_init(&p->sparsexy, _state, make_automatic);
41253 0 : _smlptrnsession_init(&p->session, _state, make_automatic);
41254 0 : ae_vector_init(&p->subset, 0, DT_INT, _state, make_automatic);
41255 0 : ae_vector_init(&p->valsubset, 0, DT_INT, _state, make_automatic);
41256 0 : }
41257 :
41258 :
41259 0 : void _mlptrainer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41260 : {
41261 0 : mlptrainer *dst = (mlptrainer*)_dst;
41262 0 : mlptrainer *src = (mlptrainer*)_src;
41263 0 : dst->nin = src->nin;
41264 0 : dst->nout = src->nout;
41265 0 : dst->rcpar = src->rcpar;
41266 0 : dst->lbfgsfactor = src->lbfgsfactor;
41267 0 : dst->decay = src->decay;
41268 0 : dst->wstep = src->wstep;
41269 0 : dst->maxits = src->maxits;
41270 0 : dst->datatype = src->datatype;
41271 0 : dst->npoints = src->npoints;
41272 0 : ae_matrix_init_copy(&dst->densexy, &src->densexy, _state, make_automatic);
41273 0 : _sparsematrix_init_copy(&dst->sparsexy, &src->sparsexy, _state, make_automatic);
41274 0 : _smlptrnsession_init_copy(&dst->session, &src->session, _state, make_automatic);
41275 0 : dst->ngradbatch = src->ngradbatch;
41276 0 : ae_vector_init_copy(&dst->subset, &src->subset, _state, make_automatic);
41277 0 : dst->subsetsize = src->subsetsize;
41278 0 : ae_vector_init_copy(&dst->valsubset, &src->valsubset, _state, make_automatic);
41279 0 : dst->valsubsetsize = src->valsubsetsize;
41280 0 : dst->algokind = src->algokind;
41281 0 : dst->minibatchsize = src->minibatchsize;
41282 0 : }
41283 :
41284 :
41285 0 : void _mlptrainer_clear(void* _p)
41286 : {
41287 0 : mlptrainer *p = (mlptrainer*)_p;
41288 0 : ae_touch_ptr((void*)p);
41289 0 : ae_matrix_clear(&p->densexy);
41290 0 : _sparsematrix_clear(&p->sparsexy);
41291 0 : _smlptrnsession_clear(&p->session);
41292 0 : ae_vector_clear(&p->subset);
41293 0 : ae_vector_clear(&p->valsubset);
41294 0 : }
41295 :
41296 :
41297 0 : void _mlptrainer_destroy(void* _p)
41298 : {
41299 0 : mlptrainer *p = (mlptrainer*)_p;
41300 0 : ae_touch_ptr((void*)p);
41301 0 : ae_matrix_destroy(&p->densexy);
41302 0 : _sparsematrix_destroy(&p->sparsexy);
41303 0 : _smlptrnsession_destroy(&p->session);
41304 0 : ae_vector_destroy(&p->subset);
41305 0 : ae_vector_destroy(&p->valsubset);
41306 0 : }
41307 :
41308 :
41309 0 : void _mlpparallelizationcv_init(void* _p, ae_state *_state, ae_bool make_automatic)
41310 : {
41311 0 : mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
41312 0 : ae_touch_ptr((void*)p);
41313 0 : _multilayerperceptron_init(&p->network, _state, make_automatic);
41314 0 : _mlpreport_init(&p->rep, _state, make_automatic);
41315 0 : ae_vector_init(&p->subset, 0, DT_INT, _state, make_automatic);
41316 0 : ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
41317 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
41318 0 : ae_shared_pool_init(&p->trnpool, _state, make_automatic);
41319 0 : }
41320 :
41321 :
41322 0 : void _mlpparallelizationcv_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
41323 : {
41324 0 : mlpparallelizationcv *dst = (mlpparallelizationcv*)_dst;
41325 0 : mlpparallelizationcv *src = (mlpparallelizationcv*)_src;
41326 0 : _multilayerperceptron_init_copy(&dst->network, &src->network, _state, make_automatic);
41327 0 : _mlpreport_init_copy(&dst->rep, &src->rep, _state, make_automatic);
41328 0 : ae_vector_init_copy(&dst->subset, &src->subset, _state, make_automatic);
41329 0 : dst->subsetsize = src->subsetsize;
41330 0 : ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
41331 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
41332 0 : dst->ngrad = src->ngrad;
41333 0 : ae_shared_pool_init_copy(&dst->trnpool, &src->trnpool, _state, make_automatic);
41334 0 : }
41335 :
41336 :
41337 0 : void _mlpparallelizationcv_clear(void* _p)
41338 : {
41339 0 : mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
41340 0 : ae_touch_ptr((void*)p);
41341 0 : _multilayerperceptron_clear(&p->network);
41342 0 : _mlpreport_clear(&p->rep);
41343 0 : ae_vector_clear(&p->subset);
41344 0 : ae_vector_clear(&p->xyrow);
41345 0 : ae_vector_clear(&p->y);
41346 0 : ae_shared_pool_clear(&p->trnpool);
41347 0 : }
41348 :
41349 :
41350 0 : void _mlpparallelizationcv_destroy(void* _p)
41351 : {
41352 0 : mlpparallelizationcv *p = (mlpparallelizationcv*)_p;
41353 0 : ae_touch_ptr((void*)p);
41354 0 : _multilayerperceptron_destroy(&p->network);
41355 0 : _mlpreport_destroy(&p->rep);
41356 0 : ae_vector_destroy(&p->subset);
41357 0 : ae_vector_destroy(&p->xyrow);
41358 0 : ae_vector_destroy(&p->y);
41359 0 : ae_shared_pool_destroy(&p->trnpool);
41360 0 : }
41361 :
41362 :
41363 : #endif
41364 : #if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
41365 :
41366 :
41367 : /*************************************************************************
41368 : This function initializes clusterizer object. Newly initialized object is
41369 : empty, i.e. it does not contain dataset. You should use it as follows:
41370 : 1. creation
41371 : 2. dataset is added with ClusterizerSetPoints()
41372 : 3. additional parameters are set
41373 : 3. clusterization is performed with one of the clustering functions
41374 :
41375 : -- ALGLIB --
41376 : Copyright 10.07.2012 by Bochkanov Sergey
41377 : *************************************************************************/
41378 0 : void clusterizercreate(clusterizerstate* s, ae_state *_state)
41379 : {
41380 :
41381 0 : _clusterizerstate_clear(s);
41382 :
41383 0 : s->npoints = 0;
41384 0 : s->nfeatures = 0;
41385 0 : s->disttype = 2;
41386 0 : s->ahcalgo = 0;
41387 0 : s->kmeansrestarts = 1;
41388 0 : s->kmeansmaxits = 0;
41389 0 : s->kmeansinitalgo = 0;
41390 0 : s->kmeansdbgnoits = ae_false;
41391 0 : s->seed = 1;
41392 0 : kmeansinitbuf(&s->kmeanstmp, _state);
41393 0 : }
41394 :
41395 :
41396 : /*************************************************************************
41397 : This function adds dataset to the clusterizer structure.
41398 :
41399 : This function overrides all previous calls of ClusterizerSetPoints() or
41400 : ClusterizerSetDistances().
41401 :
41402 : INPUT PARAMETERS:
41403 : S - clusterizer state, initialized by ClusterizerCreate()
41404 : XY - array[NPoints,NFeatures], dataset
41405 : NPoints - number of points, >=0
41406 : NFeatures- number of features, >=1
41407 : DistType- distance function:
41408 : * 0 Chebyshev distance (L-inf norm)
41409 : * 1 city block distance (L1 norm)
41410 : * 2 Euclidean distance (L2 norm), non-squared
41411 : * 10 Pearson correlation:
41412 : dist(a,b) = 1-corr(a,b)
41413 : * 11 Absolute Pearson correlation:
41414 : dist(a,b) = 1-|corr(a,b)|
41415 : * 12 Uncentered Pearson correlation (cosine of the angle):
41416 : dist(a,b) = a'*b/(|a|*|b|)
41417 : * 13 Absolute uncentered Pearson correlation
41418 : dist(a,b) = |a'*b|/(|a|*|b|)
41419 : * 20 Spearman rank correlation:
41420 : dist(a,b) = 1-rankcorr(a,b)
41421 : * 21 Absolute Spearman rank correlation
41422 : dist(a,b) = 1-|rankcorr(a,b)|
41423 :
41424 : NOTE 1: different distance functions have different performance penalty:
41425 : * Euclidean or Pearson correlation distances are the fastest ones
41426 : * Spearman correlation distance function is a bit slower
41427 : * city block and Chebyshev distances are order of magnitude slower
41428 :
41429 : The reason behing difference in performance is that correlation-based
41430 : distance functions are computed using optimized linear algebra kernels,
41431 : while Chebyshev and city block distance functions are computed using
41432 : simple nested loops with two branches at each iteration.
41433 :
41434 : NOTE 2: different clustering algorithms have different limitations:
41435 : * agglomerative hierarchical clustering algorithms may be used with
41436 : any kind of distance metric
41437 : * k-means++ clustering algorithm may be used only with Euclidean
41438 : distance function
41439 : Thus, list of specific clustering algorithms you may use depends
41440 : on distance function you specify when you set your dataset.
41441 :
41442 : -- ALGLIB --
41443 : Copyright 10.07.2012 by Bochkanov Sergey
41444 : *************************************************************************/
41445 0 : void clusterizersetpoints(clusterizerstate* s,
41446 : /* Real */ ae_matrix* xy,
41447 : ae_int_t npoints,
41448 : ae_int_t nfeatures,
41449 : ae_int_t disttype,
41450 : ae_state *_state)
41451 : {
41452 : ae_int_t i;
41453 :
41454 :
41455 0 : ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerSetPoints: incorrect DistType", _state);
41456 0 : ae_assert(npoints>=0, "ClusterizerSetPoints: NPoints<0", _state);
41457 0 : ae_assert(nfeatures>=1, "ClusterizerSetPoints: NFeatures<1", _state);
41458 0 : ae_assert(xy->rows>=npoints, "ClusterizerSetPoints: Rows(XY)<NPoints", _state);
41459 0 : ae_assert(xy->cols>=nfeatures, "ClusterizerSetPoints: Cols(XY)<NFeatures", _state);
41460 0 : ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerSetPoints: XY contains NAN/INF", _state);
41461 0 : s->npoints = npoints;
41462 0 : s->nfeatures = nfeatures;
41463 0 : s->disttype = disttype;
41464 0 : rmatrixsetlengthatleast(&s->xy, npoints, nfeatures, _state);
41465 0 : for(i=0; i<=npoints-1; i++)
41466 : {
41467 0 : ae_v_move(&s->xy.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1));
41468 : }
41469 0 : }
41470 :
41471 :
41472 : /*************************************************************************
41473 : This function adds dataset given by distance matrix to the clusterizer
41474 : structure. It is important that dataset is not given explicitly - only
41475 : distance matrix is given.
41476 :
41477 : This function overrides all previous calls of ClusterizerSetPoints() or
41478 : ClusterizerSetDistances().
41479 :
41480 : INPUT PARAMETERS:
41481 : S - clusterizer state, initialized by ClusterizerCreate()
41482 : D - array[NPoints,NPoints], distance matrix given by its upper
41483 : or lower triangle (main diagonal is ignored because its
41484 : entries are expected to be zero).
41485 : NPoints - number of points
41486 : IsUpper - whether upper or lower triangle of D is given.
41487 :
41488 : NOTE 1: different clustering algorithms have different limitations:
41489 : * agglomerative hierarchical clustering algorithms may be used with
41490 : any kind of distance metric, including one which is given by
41491 : distance matrix
41492 : * k-means++ clustering algorithm may be used only with Euclidean
41493 : distance function and explicitly given points - it can not be
41494 : used with dataset given by distance matrix
41495 : Thus, if you call this function, you will be unable to use k-means
41496 : clustering algorithm to process your problem.
41497 :
41498 : -- ALGLIB --
41499 : Copyright 10.07.2012 by Bochkanov Sergey
41500 : *************************************************************************/
41501 0 : void clusterizersetdistances(clusterizerstate* s,
41502 : /* Real */ ae_matrix* d,
41503 : ae_int_t npoints,
41504 : ae_bool isupper,
41505 : ae_state *_state)
41506 : {
41507 : ae_int_t i;
41508 : ae_int_t j;
41509 : ae_int_t j0;
41510 : ae_int_t j1;
41511 :
41512 :
41513 0 : ae_assert(npoints>=0, "ClusterizerSetDistances: NPoints<0", _state);
41514 0 : ae_assert(d->rows>=npoints, "ClusterizerSetDistances: Rows(D)<NPoints", _state);
41515 0 : ae_assert(d->cols>=npoints, "ClusterizerSetDistances: Cols(D)<NPoints", _state);
41516 0 : s->npoints = npoints;
41517 0 : s->nfeatures = 0;
41518 0 : s->disttype = -1;
41519 0 : rmatrixsetlengthatleast(&s->d, npoints, npoints, _state);
41520 0 : for(i=0; i<=npoints-1; i++)
41521 : {
41522 0 : if( isupper )
41523 : {
41524 0 : j0 = i+1;
41525 0 : j1 = npoints-1;
41526 : }
41527 : else
41528 : {
41529 0 : j0 = 0;
41530 0 : j1 = i-1;
41531 : }
41532 0 : for(j=j0; j<=j1; j++)
41533 : {
41534 0 : ae_assert(ae_isfinite(d->ptr.pp_double[i][j], _state)&&ae_fp_greater_eq(d->ptr.pp_double[i][j],(double)(0)), "ClusterizerSetDistances: D contains infinite, NAN or negative elements", _state);
41535 0 : s->d.ptr.pp_double[i][j] = d->ptr.pp_double[i][j];
41536 0 : s->d.ptr.pp_double[j][i] = d->ptr.pp_double[i][j];
41537 : }
41538 0 : s->d.ptr.pp_double[i][i] = (double)(0);
41539 : }
41540 0 : }
41541 :
41542 :
41543 : /*************************************************************************
41544 : This function sets agglomerative hierarchical clustering algorithm
41545 :
41546 : INPUT PARAMETERS:
41547 : S - clusterizer state, initialized by ClusterizerCreate()
41548 : Algo - algorithm type:
41549 : * 0 complete linkage (default algorithm)
41550 : * 1 single linkage
41551 : * 2 unweighted average linkage
41552 : * 3 weighted average linkage
41553 : * 4 Ward's method
41554 :
41555 : NOTE: Ward's method works correctly only with Euclidean distance, that's
41556 : why algorithm will return negative termination code (failure) for
41557 : any other distance type.
41558 :
41559 : It is possible, however, to use this method with user-supplied
41560 : distance matrix. It is your responsibility to pass one which was
41561 : calculated with Euclidean distance function.
41562 :
41563 : -- ALGLIB --
41564 : Copyright 10.07.2012 by Bochkanov Sergey
41565 : *************************************************************************/
41566 0 : void clusterizersetahcalgo(clusterizerstate* s,
41567 : ae_int_t algo,
41568 : ae_state *_state)
41569 : {
41570 :
41571 :
41572 0 : ae_assert((((algo==0||algo==1)||algo==2)||algo==3)||algo==4, "ClusterizerSetHCAlgo: incorrect algorithm type", _state);
41573 0 : s->ahcalgo = algo;
41574 0 : }
41575 :
41576 :
41577 : /*************************************************************************
41578 : This function sets k-means properties: number of restarts and maximum
41579 : number of iterations per one run.
41580 :
41581 : INPUT PARAMETERS:
41582 : S - clusterizer state, initialized by ClusterizerCreate()
41583 : Restarts- restarts count, >=1.
41584 : k-means++ algorithm performs several restarts and chooses
41585 : best set of centers (one with minimum squared distance).
41586 : MaxIts - maximum number of k-means iterations performed during one
41587 : run. >=0, zero value means that algorithm performs unlimited
41588 : number of iterations.
41589 :
41590 : -- ALGLIB --
41591 : Copyright 10.07.2012 by Bochkanov Sergey
41592 : *************************************************************************/
41593 0 : void clusterizersetkmeanslimits(clusterizerstate* s,
41594 : ae_int_t restarts,
41595 : ae_int_t maxits,
41596 : ae_state *_state)
41597 : {
41598 :
41599 :
41600 0 : ae_assert(restarts>=1, "ClusterizerSetKMeansLimits: Restarts<=0", _state);
41601 0 : ae_assert(maxits>=0, "ClusterizerSetKMeansLimits: MaxIts<0", _state);
41602 0 : s->kmeansrestarts = restarts;
41603 0 : s->kmeansmaxits = maxits;
41604 0 : }
41605 :
41606 :
41607 : /*************************************************************************
41608 : This function sets k-means initialization algorithm. Several different
41609 : algorithms can be chosen, including k-means++.
41610 :
41611 : INPUT PARAMETERS:
41612 : S - clusterizer state, initialized by ClusterizerCreate()
41613 : InitAlgo- initialization algorithm:
41614 : * 0 automatic selection ( different versions of ALGLIB
41615 : may select different algorithms)
41616 : * 1 random initialization
41617 : * 2 k-means++ initialization (best quality of initial
41618 : centers, but long non-parallelizable initialization
41619 : phase with bad cache locality)
41620 : * 3 "fast-greedy" algorithm with efficient, easy to
41621 : parallelize initialization. Quality of initial centers
41622 : is somewhat worse than that of k-means++. This
41623 : algorithm is a default one in the current version of
41624 : ALGLIB.
41625 : *-1 "debug" algorithm which always selects first K rows
41626 : of dataset; this algorithm is used for debug purposes
41627 : only. Do not use it in the industrial code!
41628 :
41629 : -- ALGLIB --
41630 : Copyright 21.01.2015 by Bochkanov Sergey
41631 : *************************************************************************/
41632 0 : void clusterizersetkmeansinit(clusterizerstate* s,
41633 : ae_int_t initalgo,
41634 : ae_state *_state)
41635 : {
41636 :
41637 :
41638 0 : ae_assert(initalgo>=-1&&initalgo<=3, "ClusterizerSetKMeansInit: InitAlgo is incorrect", _state);
41639 0 : s->kmeansinitalgo = initalgo;
41640 0 : }
41641 :
41642 :
41643 : /*************************************************************************
41644 : This function sets seed which is used to initialize internal RNG. By
41645 : default, deterministic seed is used - same for each run of clusterizer. If
41646 : you specify non-deterministic seed value, then some algorithms which
41647 : depend on random initialization (in current version: k-means) may return
41648 : slightly different results after each run.
41649 :
41650 : INPUT PARAMETERS:
41651 : S - clusterizer state, initialized by ClusterizerCreate()
41652 : Seed - seed:
41653 : * positive values = use deterministic seed for each run of
41654 : algorithms which depend on random initialization
41655 : * zero or negative values = use non-deterministic seed
41656 :
41657 : -- ALGLIB --
41658 : Copyright 08.06.2017 by Bochkanov Sergey
41659 : *************************************************************************/
41660 0 : void clusterizersetseed(clusterizerstate* s,
41661 : ae_int_t seed,
41662 : ae_state *_state)
41663 : {
41664 :
41665 :
41666 0 : s->seed = seed;
41667 0 : }
41668 :
41669 :
41670 : /*************************************************************************
41671 : This function performs agglomerative hierarchical clustering
41672 :
41673 : ! COMMERCIAL EDITION OF ALGLIB:
41674 : !
41675 : ! Commercial Edition of ALGLIB includes following important improvements
41676 : ! of this function:
41677 : ! * high-performance native backend with same C# interface (C# version)
41678 : ! * multithreading support (C++ and C# versions)
41679 : ! * hardware vendor (Intel) implementations of linear algebra primitives
41680 : ! (C++ and C# versions, x86/x64 platform)
41681 : !
41682 : ! We recommend you to read 'Working with commercial version' section of
41683 : ! ALGLIB Reference Manual in order to find out how to use performance-
41684 : ! related features provided by commercial edition of ALGLIB.
41685 :
41686 : NOTE: Agglomerative hierarchical clustering algorithm has two phases:
41687 : distance matrix calculation and clustering itself. Only first phase
41688 : (distance matrix calculation) is accelerated by Intel MKL and
41689 : multithreading. Thus, acceleration is significant only for medium or
41690 : high-dimensional problems.
41691 :
41692 : Although activating multithreading gives some speedup over single-
41693 : threaded execution, you should not expect nearly-linear scaling
41694 : with respect to cores count.
41695 :
41696 : INPUT PARAMETERS:
41697 : S - clusterizer state, initialized by ClusterizerCreate()
41698 :
41699 : OUTPUT PARAMETERS:
41700 : Rep - clustering results; see description of AHCReport
41701 : structure for more information.
41702 :
41703 : NOTE 1: hierarchical clustering algorithms require large amounts of memory.
41704 : In particular, this implementation needs sizeof(double)*NPoints^2
41705 : bytes, which are used to store distance matrix. In case we work
41706 : with user-supplied matrix, this amount is multiplied by 2 (we have
41707 : to store original matrix and to work with its copy).
41708 :
41709 : For example, problem with 10000 points would require 800M of RAM,
41710 : even when working in a 1-dimensional space.
41711 :
41712 : -- ALGLIB --
41713 : Copyright 10.07.2012 by Bochkanov Sergey
41714 : *************************************************************************/
41715 0 : void clusterizerrunahc(clusterizerstate* s,
41716 : ahcreport* rep,
41717 : ae_state *_state)
41718 : {
41719 : ae_int_t npoints;
41720 : ae_int_t nfeatures;
41721 :
41722 0 : _ahcreport_clear(rep);
41723 :
41724 0 : npoints = s->npoints;
41725 0 : nfeatures = s->nfeatures;
41726 :
41727 : /*
41728 : * Fill Rep.NPoints, quick exit when NPoints<=1
41729 : */
41730 0 : rep->npoints = npoints;
41731 0 : if( npoints==0 )
41732 : {
41733 0 : ae_vector_set_length(&rep->p, 0, _state);
41734 0 : ae_matrix_set_length(&rep->z, 0, 0, _state);
41735 0 : ae_matrix_set_length(&rep->pz, 0, 0, _state);
41736 0 : ae_matrix_set_length(&rep->pm, 0, 0, _state);
41737 0 : ae_vector_set_length(&rep->mergedist, 0, _state);
41738 0 : rep->terminationtype = 1;
41739 0 : return;
41740 : }
41741 0 : if( npoints==1 )
41742 : {
41743 0 : ae_vector_set_length(&rep->p, 1, _state);
41744 0 : ae_matrix_set_length(&rep->z, 0, 0, _state);
41745 0 : ae_matrix_set_length(&rep->pz, 0, 0, _state);
41746 0 : ae_matrix_set_length(&rep->pm, 0, 0, _state);
41747 0 : ae_vector_set_length(&rep->mergedist, 0, _state);
41748 0 : rep->p.ptr.p_int[0] = 0;
41749 0 : rep->terminationtype = 1;
41750 0 : return;
41751 : }
41752 :
41753 : /*
41754 : * More than one point
41755 : */
41756 0 : if( s->disttype==-1 )
41757 : {
41758 :
41759 : /*
41760 : * Run clusterizer with user-supplied distance matrix
41761 : */
41762 0 : clustering_clusterizerrunahcinternal(s, &s->d, rep, _state);
41763 0 : return;
41764 : }
41765 : else
41766 : {
41767 :
41768 : /*
41769 : * Check combination of AHC algo and distance type
41770 : */
41771 0 : if( s->ahcalgo==4&&s->disttype!=2 )
41772 : {
41773 0 : rep->terminationtype = -5;
41774 0 : return;
41775 : }
41776 :
41777 : /*
41778 : * Build distance matrix D.
41779 : */
41780 0 : clusterizergetdistancesbuf(&s->distbuf, &s->xy, npoints, nfeatures, s->disttype, &s->tmpd, _state);
41781 :
41782 : /*
41783 : * Run clusterizer
41784 : */
41785 0 : clustering_clusterizerrunahcinternal(s, &s->tmpd, rep, _state);
41786 0 : return;
41787 : }
41788 : }
41789 :
41790 :
41791 : /*************************************************************************
41792 : This function performs clustering by k-means++ algorithm.
41793 :
41794 : You may change algorithm properties by calling:
41795 : * ClusterizerSetKMeansLimits() to change number of restarts or iterations
41796 : * ClusterizerSetKMeansInit() to change initialization algorithm
41797 :
41798 : By default, one restart and unlimited number of iterations are used.
41799 : Initialization algorithm is chosen automatically.
41800 :
41801 : ! COMMERCIAL EDITION OF ALGLIB:
41802 : !
41803 : ! Commercial Edition of ALGLIB includes following important improvements
41804 : ! of this function:
41805 : ! * high-performance native backend with same C# interface (C# version)
41806 : ! * multithreading support (C++ and C# versions)
41807 : ! * hardware vendor (Intel) implementations of linear algebra primitives
41808 : ! (C++ and C# versions, x86/x64 platform)
41809 : !
41810 : ! We recommend you to read 'Working with commercial version' section of
41811 : ! ALGLIB Reference Manual in order to find out how to use performance-
41812 : ! related features provided by commercial edition of ALGLIB.
41813 :
41814 : NOTE: k-means clustering algorithm has two phases: selection of initial
41815 : centers and clustering itself. ALGLIB parallelizes both phases.
41816 : Parallel version is optimized for the following scenario: medium or
41817 : high-dimensional problem (8 or more dimensions) with large number of
41818 : points and clusters. However, some speed-up can be obtained even
41819 : when assumptions above are violated.
41820 :
41821 : INPUT PARAMETERS:
41822 : S - clusterizer state, initialized by ClusterizerCreate()
41823 : K - number of clusters, K>=0.
41824 : K can be zero only when algorithm is called for empty
41825 : dataset, in this case completion code is set to
41826 : success (+1).
41827 : If K=0 and dataset size is non-zero, we can not
41828 : meaningfully assign points to some center (there are no
41829 : centers because K=0) and return -3 as completion code
41830 : (failure).
41831 :
41832 : OUTPUT PARAMETERS:
41833 : Rep - clustering results; see description of KMeansReport
41834 : structure for more information.
41835 :
41836 : NOTE 1: k-means clustering can be performed only for datasets with
41837 : Euclidean distance function. Algorithm will return negative
41838 : completion code in Rep.TerminationType in case dataset was added
41839 : to clusterizer with DistType other than Euclidean (or dataset was
41840 : specified by distance matrix instead of explicitly given points).
41841 :
41842 : NOTE 2: by default, k-means uses non-deterministic seed to initialize RNG
41843 : which is used to select initial centers. As result, each run of
41844 : algorithm may return different values. If you need deterministic
41845 : behavior, use ClusterizerSetSeed() function.
41846 :
41847 : -- ALGLIB --
41848 : Copyright 10.07.2012 by Bochkanov Sergey
41849 : *************************************************************************/
41850 0 : void clusterizerrunkmeans(clusterizerstate* s,
41851 : ae_int_t k,
41852 : kmeansreport* rep,
41853 : ae_state *_state)
41854 : {
41855 : ae_frame _frame_block;
41856 : ae_matrix dummy;
41857 :
41858 0 : ae_frame_make(_state, &_frame_block);
41859 0 : memset(&dummy, 0, sizeof(dummy));
41860 0 : _kmeansreport_clear(rep);
41861 0 : ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
41862 :
41863 0 : ae_assert(k>=0, "ClusterizerRunKMeans: K<0", _state);
41864 :
41865 : /*
41866 : * Incorrect distance type
41867 : */
41868 0 : if( s->disttype!=2 )
41869 : {
41870 0 : rep->npoints = s->npoints;
41871 0 : rep->terminationtype = -5;
41872 0 : rep->k = k;
41873 0 : rep->iterationscount = 0;
41874 0 : rep->energy = 0.0;
41875 0 : ae_frame_leave(_state);
41876 0 : return;
41877 : }
41878 :
41879 : /*
41880 : * K>NPoints or (K=0 and NPoints>0)
41881 : */
41882 0 : if( k>s->npoints||(k==0&&s->npoints>0) )
41883 : {
41884 0 : rep->npoints = s->npoints;
41885 0 : rep->terminationtype = -3;
41886 0 : rep->k = k;
41887 0 : rep->iterationscount = 0;
41888 0 : rep->energy = 0.0;
41889 0 : ae_frame_leave(_state);
41890 0 : return;
41891 : }
41892 :
41893 : /*
41894 : * No points
41895 : */
41896 0 : if( s->npoints==0 )
41897 : {
41898 0 : rep->npoints = 0;
41899 0 : rep->terminationtype = 1;
41900 0 : rep->k = k;
41901 0 : rep->iterationscount = 0;
41902 0 : rep->energy = 0.0;
41903 0 : ae_frame_leave(_state);
41904 0 : return;
41905 : }
41906 :
41907 : /*
41908 : * Normal case:
41909 : * 1<=K<=NPoints, Euclidean distance
41910 : */
41911 0 : rep->npoints = s->npoints;
41912 0 : rep->nfeatures = s->nfeatures;
41913 0 : rep->k = k;
41914 0 : rep->npoints = s->npoints;
41915 0 : rep->nfeatures = s->nfeatures;
41916 0 : kmeansgenerateinternal(&s->xy, s->npoints, s->nfeatures, k, s->kmeansinitalgo, s->seed, s->kmeansmaxits, s->kmeansrestarts, s->kmeansdbgnoits, &rep->terminationtype, &rep->iterationscount, &dummy, ae_false, &rep->c, ae_true, &rep->cidx, &rep->energy, &s->kmeanstmp, _state);
41917 0 : ae_frame_leave(_state);
41918 : }
41919 :
41920 :
41921 : /*************************************************************************
41922 : This function returns distance matrix for dataset
41923 :
41924 : ! COMMERCIAL EDITION OF ALGLIB:
41925 : !
41926 : ! Commercial Edition of ALGLIB includes following important improvements
41927 : ! of this function:
41928 : ! * high-performance native backend with same C# interface (C# version)
41929 : ! * multithreading support (C++ and C# versions)
41930 : ! * hardware vendor (Intel) implementations of linear algebra primitives
41931 : ! (C++ and C# versions, x86/x64 platform)
41932 : !
41933 : ! We recommend you to read 'Working with commercial version' section of
41934 : ! ALGLIB Reference Manual in order to find out how to use performance-
41935 : ! related features provided by commercial edition of ALGLIB.
41936 :
41937 : INPUT PARAMETERS:
41938 : XY - array[NPoints,NFeatures], dataset
41939 : NPoints - number of points, >=0
41940 : NFeatures- number of features, >=1
41941 : DistType- distance function:
41942 : * 0 Chebyshev distance (L-inf norm)
41943 : * 1 city block distance (L1 norm)
41944 : * 2 Euclidean distance (L2 norm, non-squared)
41945 : * 10 Pearson correlation:
41946 : dist(a,b) = 1-corr(a,b)
41947 : * 11 Absolute Pearson correlation:
41948 : dist(a,b) = 1-|corr(a,b)|
41949 : * 12 Uncentered Pearson correlation (cosine of the angle):
41950 : dist(a,b) = a'*b/(|a|*|b|)
41951 : * 13 Absolute uncentered Pearson correlation
41952 : dist(a,b) = |a'*b|/(|a|*|b|)
41953 : * 20 Spearman rank correlation:
41954 : dist(a,b) = 1-rankcorr(a,b)
41955 : * 21 Absolute Spearman rank correlation
41956 : dist(a,b) = 1-|rankcorr(a,b)|
41957 :
41958 : OUTPUT PARAMETERS:
41959 : D - array[NPoints,NPoints], distance matrix
41960 : (full matrix is returned, with lower and upper triangles)
41961 :
41962 : NOTE: different distance functions have different performance penalty:
41963 : * Euclidean or Pearson correlation distances are the fastest ones
41964 : * Spearman correlation distance function is a bit slower
41965 : * city block and Chebyshev distances are order of magnitude slower
41966 :
41967 : The reason behing difference in performance is that correlation-based
41968 : distance functions are computed using optimized linear algebra kernels,
41969 : while Chebyshev and city block distance functions are computed using
41970 : simple nested loops with two branches at each iteration.
41971 :
41972 : -- ALGLIB --
41973 : Copyright 10.07.2012 by Bochkanov Sergey
41974 : *************************************************************************/
41975 0 : void clusterizergetdistances(/* Real */ ae_matrix* xy,
41976 : ae_int_t npoints,
41977 : ae_int_t nfeatures,
41978 : ae_int_t disttype,
41979 : /* Real */ ae_matrix* d,
41980 : ae_state *_state)
41981 : {
41982 : ae_frame _frame_block;
41983 : apbuffers buf;
41984 :
41985 0 : ae_frame_make(_state, &_frame_block);
41986 0 : memset(&buf, 0, sizeof(buf));
41987 0 : ae_matrix_clear(d);
41988 0 : _apbuffers_init(&buf, _state, ae_true);
41989 :
41990 0 : ae_assert(nfeatures>=1, "ClusterizerGetDistances: NFeatures<1", _state);
41991 0 : ae_assert(npoints>=0, "ClusterizerGetDistances: NPoints<1", _state);
41992 0 : ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerGetDistances: incorrect DistType", _state);
41993 0 : ae_assert(xy->rows>=npoints, "ClusterizerGetDistances: Rows(XY)<NPoints", _state);
41994 0 : ae_assert(xy->cols>=nfeatures, "ClusterizerGetDistances: Cols(XY)<NFeatures", _state);
41995 0 : ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerGetDistances: XY contains NAN/INF", _state);
41996 0 : clusterizergetdistancesbuf(&buf, xy, npoints, nfeatures, disttype, d, _state);
41997 0 : ae_frame_leave(_state);
41998 0 : }
41999 :
42000 :
42001 : /*************************************************************************
42002 : Buffered version of ClusterizerGetDistances() which reuses previously
42003 : allocated space.
42004 :
42005 : -- ALGLIB --
42006 : Copyright 29.05.2015 by Bochkanov Sergey
42007 : *************************************************************************/
42008 0 : void clusterizergetdistancesbuf(apbuffers* buf,
42009 : /* Real */ ae_matrix* xy,
42010 : ae_int_t npoints,
42011 : ae_int_t nfeatures,
42012 : ae_int_t disttype,
42013 : /* Real */ ae_matrix* d,
42014 : ae_state *_state)
42015 : {
42016 : ae_int_t i;
42017 : ae_int_t j;
42018 : double v;
42019 : double vv;
42020 : double vr;
42021 :
42022 :
42023 0 : ae_assert(nfeatures>=1, "ClusterizerGetDistancesBuf: NFeatures<1", _state);
42024 0 : ae_assert(npoints>=0, "ClusterizerGetDistancesBuf: NPoints<1", _state);
42025 0 : ae_assert((((((((disttype==0||disttype==1)||disttype==2)||disttype==10)||disttype==11)||disttype==12)||disttype==13)||disttype==20)||disttype==21, "ClusterizerGetDistancesBuf: incorrect DistType", _state);
42026 0 : ae_assert(xy->rows>=npoints, "ClusterizerGetDistancesBuf: Rows(XY)<NPoints", _state);
42027 0 : ae_assert(xy->cols>=nfeatures, "ClusterizerGetDistancesBuf: Cols(XY)<NFeatures", _state);
42028 0 : ae_assert(apservisfinitematrix(xy, npoints, nfeatures, _state), "ClusterizerGetDistancesBuf: XY contains NAN/INF", _state);
42029 :
42030 : /*
42031 : * Quick exit
42032 : */
42033 0 : if( npoints==0 )
42034 : {
42035 0 : return;
42036 : }
42037 0 : if( npoints==1 )
42038 : {
42039 0 : rmatrixsetlengthatleast(d, 1, 1, _state);
42040 0 : d->ptr.pp_double[0][0] = (double)(0);
42041 0 : return;
42042 : }
42043 :
42044 : /*
42045 : * Build distance matrix D.
42046 : */
42047 0 : if( disttype==0||disttype==1 )
42048 : {
42049 :
42050 : /*
42051 : * Chebyshev or city-block distances:
42052 : * * recursively calculate upper triangle (with main diagonal)
42053 : * * copy it to the bottom part of the matrix
42054 : */
42055 0 : rmatrixsetlengthatleast(d, npoints, npoints, _state);
42056 0 : clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, 0, npoints, 0, npoints, _state);
42057 0 : rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
42058 0 : return;
42059 : }
42060 0 : if( disttype==2 )
42061 : {
42062 :
42063 : /*
42064 : * Euclidean distance
42065 : *
42066 : * NOTE: parallelization is done within RMatrixSYRK
42067 : */
42068 0 : rmatrixsetlengthatleast(d, npoints, npoints, _state);
42069 0 : rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
42070 0 : rvectorsetlengthatleast(&buf->ra1, nfeatures, _state);
42071 0 : rvectorsetlengthatleast(&buf->ra0, npoints, _state);
42072 0 : for(j=0; j<=nfeatures-1; j++)
42073 : {
42074 0 : buf->ra1.ptr.p_double[j] = 0.0;
42075 : }
42076 0 : v = (double)1/(double)npoints;
42077 0 : for(i=0; i<=npoints-1; i++)
42078 : {
42079 0 : ae_v_addd(&buf->ra1.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1), v);
42080 : }
42081 0 : for(i=0; i<=npoints-1; i++)
42082 : {
42083 0 : ae_v_move(&buf->rm0.ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nfeatures-1));
42084 0 : ae_v_sub(&buf->rm0.ptr.pp_double[i][0], 1, &buf->ra1.ptr.p_double[0], 1, ae_v_len(0,nfeatures-1));
42085 : }
42086 0 : rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
42087 0 : for(i=0; i<=npoints-1; i++)
42088 : {
42089 0 : buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
42090 : }
42091 0 : for(i=0; i<=npoints-1; i++)
42092 : {
42093 0 : d->ptr.pp_double[i][i] = 0.0;
42094 0 : for(j=i+1; j<=npoints-1; j++)
42095 : {
42096 0 : v = ae_sqrt(ae_maxreal(buf->ra0.ptr.p_double[i]+buf->ra0.ptr.p_double[j]-2*d->ptr.pp_double[i][j], 0.0, _state), _state);
42097 0 : d->ptr.pp_double[i][j] = v;
42098 : }
42099 : }
42100 0 : rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
42101 0 : return;
42102 : }
42103 0 : if( disttype==10||disttype==11 )
42104 : {
42105 :
42106 : /*
42107 : * Absolute/nonabsolute Pearson correlation distance
42108 : *
42109 : * NOTE: parallelization is done within PearsonCorrM, which calls RMatrixSYRK internally
42110 : */
42111 0 : rmatrixsetlengthatleast(d, npoints, npoints, _state);
42112 0 : rvectorsetlengthatleast(&buf->ra0, npoints, _state);
42113 0 : rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
42114 0 : for(i=0; i<=npoints-1; i++)
42115 : {
42116 0 : v = 0.0;
42117 0 : for(j=0; j<=nfeatures-1; j++)
42118 : {
42119 0 : v = v+xy->ptr.pp_double[i][j];
42120 : }
42121 0 : v = v/nfeatures;
42122 0 : for(j=0; j<=nfeatures-1; j++)
42123 : {
42124 0 : buf->rm0.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j]-v;
42125 : }
42126 : }
42127 0 : rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
42128 0 : for(i=0; i<=npoints-1; i++)
42129 : {
42130 0 : buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
42131 : }
42132 0 : for(i=0; i<=npoints-1; i++)
42133 : {
42134 0 : d->ptr.pp_double[i][i] = 0.0;
42135 0 : for(j=i+1; j<=npoints-1; j++)
42136 : {
42137 0 : v = d->ptr.pp_double[i][j]/ae_sqrt(buf->ra0.ptr.p_double[i]*buf->ra0.ptr.p_double[j], _state);
42138 0 : if( disttype==10 )
42139 : {
42140 0 : v = 1-v;
42141 : }
42142 : else
42143 : {
42144 0 : v = 1-ae_fabs(v, _state);
42145 : }
42146 0 : v = ae_maxreal(v, 0.0, _state);
42147 0 : d->ptr.pp_double[i][j] = v;
42148 : }
42149 : }
42150 0 : rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
42151 0 : return;
42152 : }
42153 0 : if( disttype==12||disttype==13 )
42154 : {
42155 :
42156 : /*
42157 : * Absolute/nonabsolute uncentered Pearson correlation distance
42158 : *
42159 : * NOTE: parallelization is done within RMatrixSYRK
42160 : */
42161 0 : rmatrixsetlengthatleast(d, npoints, npoints, _state);
42162 0 : rvectorsetlengthatleast(&buf->ra0, npoints, _state);
42163 0 : rmatrixsyrk(npoints, nfeatures, 1.0, xy, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
42164 0 : for(i=0; i<=npoints-1; i++)
42165 : {
42166 0 : buf->ra0.ptr.p_double[i] = d->ptr.pp_double[i][i];
42167 : }
42168 0 : for(i=0; i<=npoints-1; i++)
42169 : {
42170 0 : d->ptr.pp_double[i][i] = 0.0;
42171 0 : for(j=i+1; j<=npoints-1; j++)
42172 : {
42173 0 : v = d->ptr.pp_double[i][j]/ae_sqrt(buf->ra0.ptr.p_double[i]*buf->ra0.ptr.p_double[j], _state);
42174 0 : if( disttype==13 )
42175 : {
42176 0 : v = ae_fabs(v, _state);
42177 : }
42178 0 : v = ae_minreal(v, 1.0, _state);
42179 0 : d->ptr.pp_double[i][j] = 1-v;
42180 : }
42181 : }
42182 0 : rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
42183 0 : return;
42184 : }
42185 0 : if( disttype==20||disttype==21 )
42186 : {
42187 :
42188 : /*
42189 : * Spearman rank correlation
42190 : *
42191 : * NOTE: parallelization of correlation matrix is done within
42192 : * PearsonCorrM, which calls RMatrixSYRK internally
42193 : */
42194 0 : rmatrixsetlengthatleast(d, npoints, npoints, _state);
42195 0 : rvectorsetlengthatleast(&buf->ra0, npoints, _state);
42196 0 : rmatrixsetlengthatleast(&buf->rm0, npoints, nfeatures, _state);
42197 0 : rmatrixcopy(npoints, nfeatures, xy, 0, 0, &buf->rm0, 0, 0, _state);
42198 0 : rankdatacentered(&buf->rm0, npoints, nfeatures, _state);
42199 0 : rmatrixsyrk(npoints, nfeatures, 1.0, &buf->rm0, 0, 0, 0, 0.0, d, 0, 0, ae_true, _state);
42200 0 : for(i=0; i<=npoints-1; i++)
42201 : {
42202 0 : if( ae_fp_greater(d->ptr.pp_double[i][i],(double)(0)) )
42203 : {
42204 0 : buf->ra0.ptr.p_double[i] = 1/ae_sqrt(d->ptr.pp_double[i][i], _state);
42205 : }
42206 : else
42207 : {
42208 0 : buf->ra0.ptr.p_double[i] = 0.0;
42209 : }
42210 : }
42211 0 : for(i=0; i<=npoints-1; i++)
42212 : {
42213 0 : v = buf->ra0.ptr.p_double[i];
42214 0 : d->ptr.pp_double[i][i] = 0.0;
42215 0 : for(j=i+1; j<=npoints-1; j++)
42216 : {
42217 0 : vv = d->ptr.pp_double[i][j]*v*buf->ra0.ptr.p_double[j];
42218 0 : if( disttype==20 )
42219 : {
42220 0 : vr = 1-vv;
42221 : }
42222 : else
42223 : {
42224 0 : vr = 1-ae_fabs(vv, _state);
42225 : }
42226 0 : if( ae_fp_less(vr,(double)(0)) )
42227 : {
42228 0 : vr = 0.0;
42229 : }
42230 0 : d->ptr.pp_double[i][j] = vr;
42231 : }
42232 : }
42233 0 : rmatrixenforcesymmetricity(d, npoints, ae_true, _state);
42234 0 : return;
42235 : }
42236 0 : ae_assert(ae_false, "Assertion failed", _state);
42237 : }
42238 :
42239 :
42240 : /*************************************************************************
42241 : This function takes as input clusterization report Rep, desired clusters
42242 : count K, and builds top K clusters from hierarchical clusterization tree.
42243 : It returns assignment of points to clusters (array of cluster indexes).
42244 :
42245 : INPUT PARAMETERS:
42246 : Rep - report from ClusterizerRunAHC() performed on XY
42247 : K - desired number of clusters, 1<=K<=NPoints.
42248 : K can be zero only when NPoints=0.
42249 :
42250 : OUTPUT PARAMETERS:
42251 : CIdx - array[NPoints], I-th element contains cluster index (from
42252 : 0 to K-1) for I-th point of the dataset.
42253 : CZ - array[K]. This array allows to convert cluster indexes
42254 : returned by this function to indexes used by Rep.Z. J-th
42255 : cluster returned by this function corresponds to CZ[J]-th
42256 : cluster stored in Rep.Z/PZ/PM.
42257 : It is guaranteed that CZ[I]<CZ[I+1].
42258 :
42259 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
42260 : Although they were obtained by manipulation with top K nodes of
42261 : dendrogram (i.e. hierarchical decomposition of dataset), this
42262 : function does not return information about hierarchy. Each of the
42263 : clusters stand on its own.
42264 :
42265 : NOTE: Cluster indexes returned by this function does not correspond to
42266 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
42267 : representation of the dataset (dendrogram), or you work with "flat"
42268 : representation returned by this function. Each of representations
42269 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
42270 : while latter uses [0..K-1]), although it is possible to perform
42271 : conversion from one system to another by means of CZ array, returned
42272 : by this function, which allows you to convert indexes stored in CIdx
42273 : to the numeration system used by Rep.Z.
42274 :
42275 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
42276 : it will perform many times faster than for K=100. Its worst-case
42277 : performance is O(N*K), although in average case it perform better
42278 : (up to O(N*log(K))).
42279 :
42280 : -- ALGLIB --
42281 : Copyright 10.07.2012 by Bochkanov Sergey
42282 : *************************************************************************/
42283 0 : void clusterizergetkclusters(ahcreport* rep,
42284 : ae_int_t k,
42285 : /* Integer */ ae_vector* cidx,
42286 : /* Integer */ ae_vector* cz,
42287 : ae_state *_state)
42288 : {
42289 : ae_frame _frame_block;
42290 : ae_int_t i;
42291 : ae_int_t mergeidx;
42292 : ae_int_t i0;
42293 : ae_int_t i1;
42294 : ae_int_t t;
42295 : ae_vector presentclusters;
42296 : ae_vector clusterindexes;
42297 : ae_vector clustersizes;
42298 : ae_vector tmpidx;
42299 : ae_int_t npoints;
42300 :
42301 0 : ae_frame_make(_state, &_frame_block);
42302 0 : memset(&presentclusters, 0, sizeof(presentclusters));
42303 0 : memset(&clusterindexes, 0, sizeof(clusterindexes));
42304 0 : memset(&clustersizes, 0, sizeof(clustersizes));
42305 0 : memset(&tmpidx, 0, sizeof(tmpidx));
42306 0 : ae_vector_clear(cidx);
42307 0 : ae_vector_clear(cz);
42308 0 : ae_vector_init(&presentclusters, 0, DT_BOOL, _state, ae_true);
42309 0 : ae_vector_init(&clusterindexes, 0, DT_INT, _state, ae_true);
42310 0 : ae_vector_init(&clustersizes, 0, DT_INT, _state, ae_true);
42311 0 : ae_vector_init(&tmpidx, 0, DT_INT, _state, ae_true);
42312 :
42313 0 : npoints = rep->npoints;
42314 0 : ae_assert(npoints>=0, "ClusterizerGetKClusters: internal error in Rep integrity", _state);
42315 0 : ae_assert(k>=0, "ClusterizerGetKClusters: K<=0", _state);
42316 0 : ae_assert(k<=npoints, "ClusterizerGetKClusters: K>NPoints", _state);
42317 0 : ae_assert(k>0||npoints==0, "ClusterizerGetKClusters: K<=0", _state);
42318 0 : ae_assert(npoints==rep->npoints, "ClusterizerGetKClusters: NPoints<>Rep.NPoints", _state);
42319 :
42320 : /*
42321 : * Quick exit
42322 : */
42323 0 : if( npoints==0 )
42324 : {
42325 0 : ae_frame_leave(_state);
42326 0 : return;
42327 : }
42328 0 : if( npoints==1 )
42329 : {
42330 0 : ae_vector_set_length(cz, 1, _state);
42331 0 : ae_vector_set_length(cidx, 1, _state);
42332 0 : cz->ptr.p_int[0] = 0;
42333 0 : cidx->ptr.p_int[0] = 0;
42334 0 : ae_frame_leave(_state);
42335 0 : return;
42336 : }
42337 :
42338 : /*
42339 : * Replay merges, from top to bottom,
42340 : * keep track of clusters being present at the moment
42341 : */
42342 0 : ae_vector_set_length(&presentclusters, 2*npoints-1, _state);
42343 0 : ae_vector_set_length(&tmpidx, npoints, _state);
42344 0 : for(i=0; i<=2*npoints-3; i++)
42345 : {
42346 0 : presentclusters.ptr.p_bool[i] = ae_false;
42347 : }
42348 0 : presentclusters.ptr.p_bool[2*npoints-2] = ae_true;
42349 0 : for(i=0; i<=npoints-1; i++)
42350 : {
42351 0 : tmpidx.ptr.p_int[i] = 2*npoints-2;
42352 : }
42353 0 : for(mergeidx=npoints-2; mergeidx>=npoints-k; mergeidx--)
42354 : {
42355 :
42356 : /*
42357 : * Update information about clusters being present at the moment
42358 : */
42359 0 : presentclusters.ptr.p_bool[npoints+mergeidx] = ae_false;
42360 0 : presentclusters.ptr.p_bool[rep->z.ptr.pp_int[mergeidx][0]] = ae_true;
42361 0 : presentclusters.ptr.p_bool[rep->z.ptr.pp_int[mergeidx][1]] = ae_true;
42362 :
42363 : /*
42364 : * Update TmpIdx according to the current state of the dataset
42365 : *
42366 : * NOTE: TmpIdx contains cluster indexes from [0..2*NPoints-2];
42367 : * we will convert them to [0..K-1] later.
42368 : */
42369 0 : i0 = rep->pm.ptr.pp_int[mergeidx][0];
42370 0 : i1 = rep->pm.ptr.pp_int[mergeidx][1];
42371 0 : t = rep->z.ptr.pp_int[mergeidx][0];
42372 0 : for(i=i0; i<=i1; i++)
42373 : {
42374 0 : tmpidx.ptr.p_int[i] = t;
42375 : }
42376 0 : i0 = rep->pm.ptr.pp_int[mergeidx][2];
42377 0 : i1 = rep->pm.ptr.pp_int[mergeidx][3];
42378 0 : t = rep->z.ptr.pp_int[mergeidx][1];
42379 0 : for(i=i0; i<=i1; i++)
42380 : {
42381 0 : tmpidx.ptr.p_int[i] = t;
42382 : }
42383 : }
42384 :
42385 : /*
42386 : * Fill CZ - array which allows us to convert cluster indexes
42387 : * from one system to another.
42388 : */
42389 0 : ae_vector_set_length(cz, k, _state);
42390 0 : ae_vector_set_length(&clusterindexes, 2*npoints-1, _state);
42391 0 : t = 0;
42392 0 : for(i=0; i<=2*npoints-2; i++)
42393 : {
42394 0 : if( presentclusters.ptr.p_bool[i] )
42395 : {
42396 0 : cz->ptr.p_int[t] = i;
42397 0 : clusterindexes.ptr.p_int[i] = t;
42398 0 : t = t+1;
42399 : }
42400 : }
42401 0 : ae_assert(t==k, "ClusterizerGetKClusters: internal error", _state);
42402 :
42403 : /*
42404 : * Convert indexes stored in CIdx
42405 : */
42406 0 : ae_vector_set_length(cidx, npoints, _state);
42407 0 : for(i=0; i<=npoints-1; i++)
42408 : {
42409 0 : cidx->ptr.p_int[i] = clusterindexes.ptr.p_int[tmpidx.ptr.p_int[rep->p.ptr.p_int[i]]];
42410 : }
42411 0 : ae_frame_leave(_state);
42412 : }
42413 :
42414 :
42415 : /*************************************************************************
42416 : This function accepts AHC report Rep, desired minimum intercluster
42417 : distance and returns top clusters from hierarchical clusterization tree
42418 : which are separated by distance R or HIGHER.
42419 :
42420 : It returns assignment of points to clusters (array of cluster indexes).
42421 :
42422 : There is one more function with similar name - ClusterizerSeparatedByCorr,
42423 : which returns clusters with intercluster correlation equal to R or LOWER
42424 : (note: higher for distance, lower for correlation).
42425 :
42426 : INPUT PARAMETERS:
42427 : Rep - report from ClusterizerRunAHC() performed on XY
42428 : R - desired minimum intercluster distance, R>=0
42429 :
42430 : OUTPUT PARAMETERS:
42431 : K - number of clusters, 1<=K<=NPoints
42432 : CIdx - array[NPoints], I-th element contains cluster index (from
42433 : 0 to K-1) for I-th point of the dataset.
42434 : CZ - array[K]. This array allows to convert cluster indexes
42435 : returned by this function to indexes used by Rep.Z. J-th
42436 : cluster returned by this function corresponds to CZ[J]-th
42437 : cluster stored in Rep.Z/PZ/PM.
42438 : It is guaranteed that CZ[I]<CZ[I+1].
42439 :
42440 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
42441 : Although they were obtained by manipulation with top K nodes of
42442 : dendrogram (i.e. hierarchical decomposition of dataset), this
42443 : function does not return information about hierarchy. Each of the
42444 : clusters stand on its own.
42445 :
42446 : NOTE: Cluster indexes returned by this function does not correspond to
42447 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
42448 : representation of the dataset (dendrogram), or you work with "flat"
42449 : representation returned by this function. Each of representations
42450 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
42451 : while latter uses [0..K-1]), although it is possible to perform
42452 : conversion from one system to another by means of CZ array, returned
42453 : by this function, which allows you to convert indexes stored in CIdx
42454 : to the numeration system used by Rep.Z.
42455 :
42456 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
42457 : it will perform many times faster than for K=100. Its worst-case
42458 : performance is O(N*K), although in average case it perform better
42459 : (up to O(N*log(K))).
42460 :
42461 : -- ALGLIB --
42462 : Copyright 10.07.2012 by Bochkanov Sergey
42463 : *************************************************************************/
42464 0 : void clusterizerseparatedbydist(ahcreport* rep,
42465 : double r,
42466 : ae_int_t* k,
42467 : /* Integer */ ae_vector* cidx,
42468 : /* Integer */ ae_vector* cz,
42469 : ae_state *_state)
42470 : {
42471 :
42472 0 : *k = 0;
42473 0 : ae_vector_clear(cidx);
42474 0 : ae_vector_clear(cz);
42475 :
42476 0 : ae_assert(ae_isfinite(r, _state)&&ae_fp_greater_eq(r,(double)(0)), "ClusterizerSeparatedByDist: R is infinite or less than 0", _state);
42477 0 : *k = 1;
42478 0 : while(*k<rep->npoints&&ae_fp_greater_eq(rep->mergedist.ptr.p_double[rep->npoints-1-(*k)],r))
42479 : {
42480 0 : *k = *k+1;
42481 : }
42482 0 : clusterizergetkclusters(rep, *k, cidx, cz, _state);
42483 0 : }
42484 :
42485 :
42486 : /*************************************************************************
42487 : This function accepts AHC report Rep, desired maximum intercluster
42488 : correlation and returns top clusters from hierarchical clusterization tree
42489 : which are separated by correlation R or LOWER.
42490 :
42491 : It returns assignment of points to clusters (array of cluster indexes).
42492 :
42493 : There is one more function with similar name - ClusterizerSeparatedByDist,
42494 : which returns clusters with intercluster distance equal to R or HIGHER
42495 : (note: higher for distance, lower for correlation).
42496 :
42497 : INPUT PARAMETERS:
42498 : Rep - report from ClusterizerRunAHC() performed on XY
42499 : R - desired maximum intercluster correlation, -1<=R<=+1
42500 :
42501 : OUTPUT PARAMETERS:
42502 : K - number of clusters, 1<=K<=NPoints
42503 : CIdx - array[NPoints], I-th element contains cluster index (from
42504 : 0 to K-1) for I-th point of the dataset.
42505 : CZ - array[K]. This array allows to convert cluster indexes
42506 : returned by this function to indexes used by Rep.Z. J-th
42507 : cluster returned by this function corresponds to CZ[J]-th
42508 : cluster stored in Rep.Z/PZ/PM.
42509 : It is guaranteed that CZ[I]<CZ[I+1].
42510 :
42511 : NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
42512 : Although they were obtained by manipulation with top K nodes of
42513 : dendrogram (i.e. hierarchical decomposition of dataset), this
42514 : function does not return information about hierarchy. Each of the
42515 : clusters stand on its own.
42516 :
42517 : NOTE: Cluster indexes returned by this function does not correspond to
42518 : indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
42519 : representation of the dataset (dendrogram), or you work with "flat"
42520 : representation returned by this function. Each of representations
42521 : has its own clusters indexing system (former uses [0, 2*NPoints-2]),
42522 : while latter uses [0..K-1]), although it is possible to perform
42523 : conversion from one system to another by means of CZ array, returned
42524 : by this function, which allows you to convert indexes stored in CIdx
42525 : to the numeration system used by Rep.Z.
42526 :
42527 : NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
42528 : it will perform many times faster than for K=100. Its worst-case
42529 : performance is O(N*K), although in average case it perform better
42530 : (up to O(N*log(K))).
42531 :
42532 : -- ALGLIB --
42533 : Copyright 10.07.2012 by Bochkanov Sergey
42534 : *************************************************************************/
42535 0 : void clusterizerseparatedbycorr(ahcreport* rep,
42536 : double r,
42537 : ae_int_t* k,
42538 : /* Integer */ ae_vector* cidx,
42539 : /* Integer */ ae_vector* cz,
42540 : ae_state *_state)
42541 : {
42542 :
42543 0 : *k = 0;
42544 0 : ae_vector_clear(cidx);
42545 0 : ae_vector_clear(cz);
42546 :
42547 0 : ae_assert((ae_isfinite(r, _state)&&ae_fp_greater_eq(r,(double)(-1)))&&ae_fp_less_eq(r,(double)(1)), "ClusterizerSeparatedByCorr: R is infinite or less than 0", _state);
42548 0 : *k = 1;
42549 0 : while(*k<rep->npoints&&ae_fp_greater_eq(rep->mergedist.ptr.p_double[rep->npoints-1-(*k)],1-r))
42550 : {
42551 0 : *k = *k+1;
42552 : }
42553 0 : clusterizergetkclusters(rep, *k, cidx, cz, _state);
42554 0 : }
42555 :
42556 :
42557 : /*************************************************************************
42558 : K-means++ initialization
42559 :
42560 : INPUT PARAMETERS:
42561 : Buf - special reusable structure which stores previously allocated
42562 : memory, intended to avoid memory fragmentation when solving
42563 : multiple subsequent problems. Must be initialized prior to
42564 : usage.
42565 :
42566 : OUTPUT PARAMETERS:
42567 : Buf - initialized structure
42568 :
42569 : -- ALGLIB --
42570 : Copyright 24.07.2015 by Bochkanov Sergey
42571 : *************************************************************************/
42572 0 : void kmeansinitbuf(kmeansbuffers* buf, ae_state *_state)
42573 : {
42574 : ae_frame _frame_block;
42575 : apbuffers updateseed;
42576 :
42577 0 : ae_frame_make(_state, &_frame_block);
42578 0 : memset(&updateseed, 0, sizeof(updateseed));
42579 0 : _apbuffers_init(&updateseed, _state, ae_true);
42580 :
42581 0 : ae_shared_pool_set_seed(&buf->updatepool, &updateseed, sizeof(updateseed), _apbuffers_init, _apbuffers_init_copy, _apbuffers_destroy, _state);
42582 0 : ae_frame_leave(_state);
42583 0 : }
42584 :
42585 :
42586 : /*************************************************************************
42587 : K-means++ clusterization
42588 :
42589 : INPUT PARAMETERS:
42590 : XY - dataset, array [0..NPoints-1,0..NVars-1].
42591 : NPoints - dataset size, NPoints>=K
42592 : NVars - number of variables, NVars>=1
42593 : K - desired number of clusters, K>=1
42594 : InitAlgo - initialization algorithm:
42595 : * 0 - automatic selection of best algorithm
42596 : * 1 - random selection of centers
42597 : * 2 - k-means++
42598 : * 3 - fast-greedy init
42599 : *-1 - first K rows of dataset are used
42600 : (special debug algorithm)
42601 : Seed - seed value for internal RNG:
42602 : * positive value is used to initialize RNG in order to
42603 : induce deterministic behavior of algorithm
42604 : * zero or negative value means that random seed is
42605 : generated
42606 : MaxIts - iterations limit or zero for no limit
42607 : Restarts - number of restarts, Restarts>=1
42608 : KMeansDbgNoIts- debug flag; if set, Lloyd's iteration is not performed,
42609 : only initialization phase.
42610 : Buf - special reusable structure which stores previously allocated
42611 : memory, intended to avoid memory fragmentation when solving
42612 : multiple subsequent problems:
42613 : * MUST BE INITIALIZED WITH KMeansInitBuffers() CALL BEFORE
42614 : FIRST PASS TO THIS FUNCTION!
42615 : * subsequent passes must be made without re-initialization
42616 :
42617 : OUTPUT PARAMETERS:
42618 : Info - return code:
42619 : * -3, if task is degenerate (number of distinct points is
42620 : less than K)
42621 : * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
42622 : * 1, if subroutine finished successfully
42623 : IterationsCount- actual number of iterations performed by clusterizer
42624 : CCol - array[0..NVars-1,0..K-1].matrix whose columns store
42625 : cluster's centers
42626 : NeedCCol - True in case caller requires to store result in CCol
42627 : CRow - array[0..K-1,0..NVars-1], same as CCol, but centers are
42628 : stored in rows
42629 : NeedCRow - True in case caller requires to store result in CCol
42630 : XYC - array[NPoints], which contains cluster indexes
42631 : Energy - merit function of clusterization
42632 :
42633 : -- ALGLIB --
42634 : Copyright 21.03.2009 by Bochkanov Sergey
42635 : *************************************************************************/
42636 0 : void kmeansgenerateinternal(/* Real */ ae_matrix* xy,
42637 : ae_int_t npoints,
42638 : ae_int_t nvars,
42639 : ae_int_t k,
42640 : ae_int_t initalgo,
42641 : ae_int_t seed,
42642 : ae_int_t maxits,
42643 : ae_int_t restarts,
42644 : ae_bool kmeansdbgnoits,
42645 : ae_int_t* info,
42646 : ae_int_t* iterationscount,
42647 : /* Real */ ae_matrix* ccol,
42648 : ae_bool needccol,
42649 : /* Real */ ae_matrix* crow,
42650 : ae_bool needcrow,
42651 : /* Integer */ ae_vector* xyc,
42652 : double* energy,
42653 : kmeansbuffers* buf,
42654 : ae_state *_state)
42655 : {
42656 : ae_frame _frame_block;
42657 : ae_int_t i;
42658 : ae_int_t j;
42659 : ae_int_t i1;
42660 : double e;
42661 : double eprev;
42662 : double v;
42663 : double vv;
42664 : ae_bool waschanges;
42665 : ae_bool zerosizeclusters;
42666 : ae_int_t pass;
42667 : ae_int_t itcnt;
42668 : hqrndstate rs;
42669 :
42670 0 : ae_frame_make(_state, &_frame_block);
42671 0 : memset(&rs, 0, sizeof(rs));
42672 0 : *info = 0;
42673 0 : *iterationscount = 0;
42674 0 : ae_matrix_clear(ccol);
42675 0 : ae_matrix_clear(crow);
42676 0 : ae_vector_clear(xyc);
42677 0 : *energy = 0;
42678 0 : _hqrndstate_init(&rs, _state, ae_true);
42679 :
42680 :
42681 : /*
42682 : * Test parameters
42683 : */
42684 0 : if( ((npoints<k||nvars<1)||k<1)||restarts<1 )
42685 : {
42686 0 : *info = -1;
42687 0 : *iterationscount = 0;
42688 0 : ae_frame_leave(_state);
42689 0 : return;
42690 : }
42691 :
42692 : /*
42693 : * TODO: special case K=1
42694 : * TODO: special case K=NPoints
42695 : */
42696 0 : *info = 1;
42697 0 : *iterationscount = 0;
42698 :
42699 : /*
42700 : * Multiple passes of k-means++ algorithm
42701 : */
42702 0 : if( seed<=0 )
42703 : {
42704 0 : hqrndrandomize(&rs, _state);
42705 : }
42706 : else
42707 : {
42708 0 : hqrndseed(325355, seed, &rs, _state);
42709 : }
42710 0 : ae_vector_set_length(xyc, npoints, _state);
42711 0 : rmatrixsetlengthatleast(&buf->ct, k, nvars, _state);
42712 0 : rmatrixsetlengthatleast(&buf->ctbest, k, nvars, _state);
42713 0 : ivectorsetlengthatleast(&buf->xycprev, npoints, _state);
42714 0 : ivectorsetlengthatleast(&buf->xycbest, npoints, _state);
42715 0 : rvectorsetlengthatleast(&buf->d2, npoints, _state);
42716 0 : ivectorsetlengthatleast(&buf->csizes, k, _state);
42717 0 : *energy = ae_maxrealnumber;
42718 0 : for(pass=1; pass<=restarts; pass++)
42719 : {
42720 :
42721 : /*
42722 : * Select initial centers.
42723 : *
42724 : * Note that for performance reasons centers are stored in ROWS of CT, not
42725 : * in columns. We'll transpose CT in the end and store it in the C.
42726 : *
42727 : * Also note that SelectInitialCenters() may return degenerate set of centers
42728 : * (some of them have no corresponding points in dataset, some are non-distinct).
42729 : * Algorithm below is robust enough to deal with such set.
42730 : */
42731 0 : clustering_selectinitialcenters(xy, npoints, nvars, initalgo, &rs, k, &buf->ct, &buf->initbuf, &buf->updatepool, _state);
42732 :
42733 : /*
42734 : * Lloyd's iteration
42735 : */
42736 0 : if( !kmeansdbgnoits )
42737 : {
42738 :
42739 : /*
42740 : * Perform iteration as usual, in normal mode
42741 : */
42742 0 : for(i=0; i<=npoints-1; i++)
42743 : {
42744 0 : xyc->ptr.p_int[i] = -1;
42745 : }
42746 0 : eprev = ae_maxrealnumber;
42747 0 : e = ae_maxrealnumber;
42748 0 : itcnt = 0;
42749 0 : while(maxits==0||itcnt<maxits)
42750 : {
42751 :
42752 : /*
42753 : * Update iteration counter
42754 : */
42755 0 : itcnt = itcnt+1;
42756 0 : inc(iterationscount, _state);
42757 :
42758 : /*
42759 : * Call KMeansUpdateDistances(), fill XYC with center numbers,
42760 : * D2 with center distances.
42761 : */
42762 0 : for(i=0; i<=npoints-1; i++)
42763 : {
42764 0 : buf->xycprev.ptr.p_int[i] = xyc->ptr.p_int[i];
42765 : }
42766 0 : kmeansupdatedistances(xy, 0, npoints, nvars, &buf->ct, 0, k, xyc, &buf->d2, &buf->updatepool, _state);
42767 0 : waschanges = ae_false;
42768 0 : for(i=0; i<=npoints-1; i++)
42769 : {
42770 0 : waschanges = waschanges||xyc->ptr.p_int[i]!=buf->xycprev.ptr.p_int[i];
42771 : }
42772 :
42773 : /*
42774 : * Update centers
42775 : */
42776 0 : for(j=0; j<=k-1; j++)
42777 : {
42778 0 : buf->csizes.ptr.p_int[j] = 0;
42779 : }
42780 0 : for(i=0; i<=k-1; i++)
42781 : {
42782 0 : for(j=0; j<=nvars-1; j++)
42783 : {
42784 0 : buf->ct.ptr.pp_double[i][j] = (double)(0);
42785 : }
42786 : }
42787 0 : for(i=0; i<=npoints-1; i++)
42788 : {
42789 0 : buf->csizes.ptr.p_int[xyc->ptr.p_int[i]] = buf->csizes.ptr.p_int[xyc->ptr.p_int[i]]+1;
42790 0 : ae_v_add(&buf->ct.ptr.pp_double[xyc->ptr.p_int[i]][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
42791 : }
42792 0 : zerosizeclusters = ae_false;
42793 0 : for(j=0; j<=k-1; j++)
42794 : {
42795 0 : if( buf->csizes.ptr.p_int[j]!=0 )
42796 : {
42797 0 : v = (double)1/(double)buf->csizes.ptr.p_int[j];
42798 0 : ae_v_muld(&buf->ct.ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1), v);
42799 : }
42800 0 : zerosizeclusters = zerosizeclusters||buf->csizes.ptr.p_int[j]==0;
42801 : }
42802 0 : if( zerosizeclusters )
42803 : {
42804 :
42805 : /*
42806 : * Some clusters have zero size - rare, but possible.
42807 : * We'll choose new centers for such clusters using k-means++ rule
42808 : * and restart algorithm, decrementing iteration counter
42809 : * in order to allow one more iteration (this one was useless
42810 : * and should not be counted).
42811 : */
42812 0 : if( !clustering_fixcenters(xy, npoints, nvars, &buf->ct, k, &buf->initbuf, &buf->updatepool, _state) )
42813 : {
42814 0 : *info = -3;
42815 0 : ae_frame_leave(_state);
42816 0 : return;
42817 : }
42818 0 : itcnt = itcnt-1;
42819 0 : continue;
42820 : }
42821 :
42822 : /*
42823 : * Stop if one of two conditions is met:
42824 : * 1. nothing has changed during iteration
42825 : * 2. energy function increased after recalculation on new centers
42826 : */
42827 0 : e = (double)(0);
42828 0 : for(i=0; i<=npoints-1; i++)
42829 : {
42830 0 : v = 0.0;
42831 0 : i1 = xyc->ptr.p_int[i];
42832 0 : for(j=0; j<=nvars-1; j++)
42833 : {
42834 0 : vv = xy->ptr.pp_double[i][j]-buf->ct.ptr.pp_double[i1][j];
42835 0 : v = v+vv*vv;
42836 : }
42837 0 : e = e+v;
42838 : }
42839 0 : if( !waschanges||ae_fp_greater_eq(e,eprev) )
42840 : {
42841 0 : break;
42842 : }
42843 :
42844 : /*
42845 : * Update EPrev
42846 : */
42847 0 : eprev = e;
42848 : }
42849 : }
42850 : else
42851 : {
42852 :
42853 : /*
42854 : * Debug mode: no Lloyd's iteration.
42855 : * We just calculate potential E.
42856 : */
42857 0 : kmeansupdatedistances(xy, 0, npoints, nvars, &buf->ct, 0, k, xyc, &buf->d2, &buf->updatepool, _state);
42858 0 : e = (double)(0);
42859 0 : for(i=0; i<=npoints-1; i++)
42860 : {
42861 0 : e = e+buf->d2.ptr.p_double[i];
42862 : }
42863 : }
42864 :
42865 : /*
42866 : * Compare E with best centers found so far
42867 : */
42868 0 : if( ae_fp_less(e,*energy) )
42869 : {
42870 :
42871 : /*
42872 : * store partition.
42873 : */
42874 0 : *energy = e;
42875 0 : copymatrix(&buf->ct, 0, k-1, 0, nvars-1, &buf->ctbest, 0, k-1, 0, nvars-1, _state);
42876 0 : for(i=0; i<=npoints-1; i++)
42877 : {
42878 0 : buf->xycbest.ptr.p_int[i] = xyc->ptr.p_int[i];
42879 : }
42880 : }
42881 : }
42882 :
42883 : /*
42884 : * Copy and transpose
42885 : */
42886 0 : if( needccol )
42887 : {
42888 0 : ae_matrix_set_length(ccol, nvars, k, _state);
42889 0 : copyandtranspose(&buf->ctbest, 0, k-1, 0, nvars-1, ccol, 0, nvars-1, 0, k-1, _state);
42890 : }
42891 0 : if( needcrow )
42892 : {
42893 0 : ae_matrix_set_length(crow, k, nvars, _state);
42894 0 : rmatrixcopy(k, nvars, &buf->ctbest, 0, 0, crow, 0, 0, _state);
42895 : }
42896 0 : for(i=0; i<=npoints-1; i++)
42897 : {
42898 0 : xyc->ptr.p_int[i] = buf->xycbest.ptr.p_int[i];
42899 : }
42900 0 : ae_frame_leave(_state);
42901 : }
42902 :
42903 :
42904 : /*************************************************************************
42905 : This procedure recalculates distances from points to centers and assigns
42906 : each point to closest center.
42907 :
42908 : INPUT PARAMETERS:
42909 : XY - dataset, array [0..NPoints-1,0..NVars-1].
42910 : Idx0,Idx1 - define range of dataset [Idx0,Idx1) to process;
42911 : right boundary is not included.
42912 : NVars - number of variables, NVars>=1
42913 : CT - matrix of centers, centers are stored in rows
42914 : CIdx0,CIdx1 - define range of centers [CIdx0,CIdx1) to process;
42915 : right boundary is not included.
42916 : XYC - preallocated output buffer,
42917 : XYDist2 - preallocated output buffer
42918 : Tmp - temporary buffer, automatically reallocated if needed
42919 : BufferPool - shared pool seeded with instance of APBuffers structure
42920 : (seed instance can be unitialized). It is recommended
42921 : to use this pool only with KMeansUpdateDistances()
42922 : function.
42923 :
42924 : OUTPUT PARAMETERS:
42925 : XYC - new assignment of points to centers are stored
42926 : in [Idx0,Idx1)
42927 : XYDist2 - squared distances from points to their centers are
42928 : stored in [Idx0,Idx1)
42929 :
42930 : -- ALGLIB --
42931 : Copyright 21.01.2015 by Bochkanov Sergey
42932 : *************************************************************************/
42933 0 : void kmeansupdatedistances(/* Real */ ae_matrix* xy,
42934 : ae_int_t idx0,
42935 : ae_int_t idx1,
42936 : ae_int_t nvars,
42937 : /* Real */ ae_matrix* ct,
42938 : ae_int_t cidx0,
42939 : ae_int_t cidx1,
42940 : /* Integer */ ae_vector* xyc,
42941 : /* Real */ ae_vector* xydist2,
42942 : ae_shared_pool* bufferpool,
42943 : ae_state *_state)
42944 : {
42945 : ae_frame _frame_block;
42946 : ae_int_t i;
42947 : ae_int_t i0;
42948 : ae_int_t i1;
42949 : ae_int_t j;
42950 : ae_int_t cclosest;
42951 : double dclosest;
42952 : double vv;
42953 : apbuffers *buf;
42954 : ae_smart_ptr _buf;
42955 : double rcomplexity;
42956 : ae_int_t task0;
42957 : ae_int_t task1;
42958 : ae_int_t pblkcnt;
42959 : ae_int_t cblkcnt;
42960 : ae_int_t vblkcnt;
42961 : ae_int_t pblk;
42962 : ae_int_t cblk;
42963 : ae_int_t vblk;
42964 : ae_int_t p0;
42965 : ae_int_t p1;
42966 : ae_int_t c0;
42967 : ae_int_t c1;
42968 : ae_int_t v0;
42969 : ae_int_t v1;
42970 : double v00;
42971 : double v01;
42972 : double v10;
42973 : double v11;
42974 : double vp0;
42975 : double vp1;
42976 : double vc0;
42977 : double vc1;
42978 : ae_int_t pcnt;
42979 : ae_int_t pcntpadded;
42980 : ae_int_t ccnt;
42981 : ae_int_t ccntpadded;
42982 : ae_int_t offs0;
42983 : ae_int_t offs00;
42984 : ae_int_t offs01;
42985 : ae_int_t offs10;
42986 : ae_int_t offs11;
42987 : ae_int_t vcnt;
42988 : ae_int_t stride;
42989 :
42990 0 : ae_frame_make(_state, &_frame_block);
42991 0 : memset(&_buf, 0, sizeof(_buf));
42992 0 : ae_smart_ptr_init(&_buf, (void**)&buf, _state, ae_true);
42993 :
42994 :
42995 : /*
42996 : * Quick exit for special cases
42997 : */
42998 0 : if( idx1<=idx0 )
42999 : {
43000 0 : ae_frame_leave(_state);
43001 0 : return;
43002 : }
43003 0 : if( cidx1<=cidx0 )
43004 : {
43005 0 : ae_frame_leave(_state);
43006 0 : return;
43007 : }
43008 0 : if( nvars<=0 )
43009 : {
43010 0 : ae_frame_leave(_state);
43011 0 : return;
43012 : }
43013 :
43014 : /*
43015 : * Try to recursively divide/process dataset
43016 : *
43017 : * NOTE: real arithmetics is used to avoid integer overflow on large problem sizes
43018 : */
43019 0 : rcomplexity = 2*rmul3((double)(idx1-idx0), (double)(cidx1-cidx0), (double)(nvars), _state);
43020 0 : if( ae_fp_greater_eq(rcomplexity,smpactivationlevel(_state))&&idx1-idx0>=2*clustering_kmeansblocksize )
43021 : {
43022 0 : if( _trypexec_kmeansupdatedistances(xy,idx0,idx1,nvars,ct,cidx0,cidx1,xyc,xydist2,bufferpool, _state) )
43023 : {
43024 0 : ae_frame_leave(_state);
43025 0 : return;
43026 : }
43027 : }
43028 0 : if( ((ae_fp_greater_eq(rcomplexity,spawnlevel(_state))&&idx1-idx0>=2*clustering_kmeansblocksize)&&nvars>=clustering_kmeansparalleldim)&&cidx1-cidx0>=clustering_kmeansparallelk )
43029 : {
43030 0 : splitlength(idx1-idx0, clustering_kmeansblocksize, &task0, &task1, _state);
43031 0 : kmeansupdatedistances(xy, idx0, idx0+task0, nvars, ct, cidx0, cidx1, xyc, xydist2, bufferpool, _state);
43032 0 : kmeansupdatedistances(xy, idx0+task0, idx1, nvars, ct, cidx0, cidx1, xyc, xydist2, bufferpool, _state);
43033 0 : ae_frame_leave(_state);
43034 0 : return;
43035 : }
43036 :
43037 : /*
43038 : * Dataset chunk is selected.
43039 : *
43040 : * Process it with blocked algorithm:
43041 : * * iterate over points, process them in KMeansBlockSize-ed chunks
43042 : * * for each chunk of dataset, iterate over centers, process them in KMeansBlockSize-ed chunks
43043 : * * for each chunk of dataset/centerset, iterate over variables, process them in KMeansBlockSize-ed chunks
43044 : */
43045 0 : ae_assert(clustering_kmeansblocksize%2==0, "KMeansUpdateDistances: internal error", _state);
43046 0 : ae_shared_pool_retrieve(bufferpool, &_buf, _state);
43047 0 : rvectorsetlengthatleast(&buf->ra0, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
43048 0 : rvectorsetlengthatleast(&buf->ra1, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
43049 0 : rvectorsetlengthatleast(&buf->ra2, clustering_kmeansblocksize*clustering_kmeansblocksize, _state);
43050 0 : rvectorsetlengthatleast(&buf->ra3, clustering_kmeansblocksize, _state);
43051 0 : ivectorsetlengthatleast(&buf->ia3, clustering_kmeansblocksize, _state);
43052 0 : pblkcnt = chunkscount(idx1-idx0, clustering_kmeansblocksize, _state);
43053 0 : cblkcnt = chunkscount(cidx1-cidx0, clustering_kmeansblocksize, _state);
43054 0 : vblkcnt = chunkscount(nvars, clustering_kmeansblocksize, _state);
43055 0 : for(pblk=0; pblk<=pblkcnt-1; pblk++)
43056 : {
43057 :
43058 : /*
43059 : * Process PBlk-th chunk of dataset.
43060 : */
43061 0 : p0 = idx0+pblk*clustering_kmeansblocksize;
43062 0 : p1 = ae_minint(p0+clustering_kmeansblocksize, idx1, _state);
43063 :
43064 : /*
43065 : * Prepare RA3[]/IA3[] for storage of best distances and best cluster numbers.
43066 : */
43067 0 : for(i=0; i<=clustering_kmeansblocksize-1; i++)
43068 : {
43069 0 : buf->ra3.ptr.p_double[i] = ae_maxrealnumber;
43070 0 : buf->ia3.ptr.p_int[i] = -1;
43071 : }
43072 :
43073 : /*
43074 : * Iterare over chunks of centerset.
43075 : */
43076 0 : for(cblk=0; cblk<=cblkcnt-1; cblk++)
43077 : {
43078 :
43079 : /*
43080 : * Process CBlk-th chunk of centerset
43081 : */
43082 0 : c0 = cidx0+cblk*clustering_kmeansblocksize;
43083 0 : c1 = ae_minint(c0+clustering_kmeansblocksize, cidx1, _state);
43084 :
43085 : /*
43086 : * At this point we have to calculate a set of pairwise distances
43087 : * between points [P0,P1) and centers [C0,C1) and select best center
43088 : * for each point. It can also be done with blocked algorithm
43089 : * (blocking for variables).
43090 : *
43091 : * Following arrays are used:
43092 : * * RA0[] - matrix of distances, padded by zeros for even size,
43093 : * rows are stored with stride KMeansBlockSize.
43094 : * * RA1[] - matrix of points (variables corresponding to current
43095 : * block are extracted), padded by zeros for even size,
43096 : * rows are stored with stride KMeansBlockSize.
43097 : * * RA2[] - matrix of centers (variables corresponding to current
43098 : * block are extracted), padded by zeros for even size,
43099 : * rows are stored with stride KMeansBlockSize.
43100 : *
43101 : */
43102 0 : pcnt = p1-p0;
43103 0 : pcntpadded = pcnt+pcnt%2;
43104 0 : ccnt = c1-c0;
43105 0 : ccntpadded = ccnt+ccnt%2;
43106 0 : stride = clustering_kmeansblocksize;
43107 0 : ae_assert(pcntpadded<=clustering_kmeansblocksize, "KMeansUpdateDistances: integrity error", _state);
43108 0 : ae_assert(ccntpadded<=clustering_kmeansblocksize, "KMeansUpdateDistances: integrity error", _state);
43109 0 : for(i=0; i<=pcntpadded-1; i++)
43110 : {
43111 0 : for(j=0; j<=ccntpadded-1; j++)
43112 : {
43113 0 : buf->ra0.ptr.p_double[i*stride+j] = 0.0;
43114 : }
43115 : }
43116 0 : for(vblk=0; vblk<=vblkcnt-1; vblk++)
43117 : {
43118 :
43119 : /*
43120 : * Fetch VBlk-th block of variables to arrays RA1 (points) and RA2 (centers).
43121 : * Pad points and centers with zeros.
43122 : */
43123 0 : v0 = vblk*clustering_kmeansblocksize;
43124 0 : v1 = ae_minint(v0+clustering_kmeansblocksize, nvars, _state);
43125 0 : vcnt = v1-v0;
43126 0 : for(i=0; i<=pcnt-1; i++)
43127 : {
43128 0 : for(j=0; j<=vcnt-1; j++)
43129 : {
43130 0 : buf->ra1.ptr.p_double[i*stride+j] = xy->ptr.pp_double[p0+i][v0+j];
43131 : }
43132 : }
43133 0 : for(i=pcnt; i<=pcntpadded-1; i++)
43134 : {
43135 0 : for(j=0; j<=vcnt-1; j++)
43136 : {
43137 0 : buf->ra1.ptr.p_double[i*stride+j] = 0.0;
43138 : }
43139 : }
43140 0 : for(i=0; i<=ccnt-1; i++)
43141 : {
43142 0 : for(j=0; j<=vcnt-1; j++)
43143 : {
43144 0 : buf->ra2.ptr.p_double[i*stride+j] = ct->ptr.pp_double[c0+i][v0+j];
43145 : }
43146 : }
43147 0 : for(i=ccnt; i<=ccntpadded-1; i++)
43148 : {
43149 0 : for(j=0; j<=vcnt-1; j++)
43150 : {
43151 0 : buf->ra2.ptr.p_double[i*stride+j] = 0.0;
43152 : }
43153 : }
43154 :
43155 : /*
43156 : * Update distance matrix with sums-of-squared-differences of RA1 and RA2
43157 : */
43158 0 : i0 = 0;
43159 0 : while(i0<pcntpadded)
43160 : {
43161 0 : i1 = 0;
43162 0 : while(i1<ccntpadded)
43163 : {
43164 0 : offs0 = i0*stride+i1;
43165 0 : v00 = buf->ra0.ptr.p_double[offs0];
43166 0 : v01 = buf->ra0.ptr.p_double[offs0+1];
43167 0 : v10 = buf->ra0.ptr.p_double[offs0+stride];
43168 0 : v11 = buf->ra0.ptr.p_double[offs0+stride+1];
43169 0 : offs00 = i0*stride;
43170 0 : offs01 = offs00+stride;
43171 0 : offs10 = i1*stride;
43172 0 : offs11 = offs10+stride;
43173 0 : for(j=0; j<=vcnt-1; j++)
43174 : {
43175 0 : vp0 = buf->ra1.ptr.p_double[offs00+j];
43176 0 : vp1 = buf->ra1.ptr.p_double[offs01+j];
43177 0 : vc0 = buf->ra2.ptr.p_double[offs10+j];
43178 0 : vc1 = buf->ra2.ptr.p_double[offs11+j];
43179 0 : vv = vp0-vc0;
43180 0 : v00 = v00+vv*vv;
43181 0 : vv = vp0-vc1;
43182 0 : v01 = v01+vv*vv;
43183 0 : vv = vp1-vc0;
43184 0 : v10 = v10+vv*vv;
43185 0 : vv = vp1-vc1;
43186 0 : v11 = v11+vv*vv;
43187 : }
43188 0 : offs0 = i0*stride+i1;
43189 0 : buf->ra0.ptr.p_double[offs0] = v00;
43190 0 : buf->ra0.ptr.p_double[offs0+1] = v01;
43191 0 : buf->ra0.ptr.p_double[offs0+stride] = v10;
43192 0 : buf->ra0.ptr.p_double[offs0+stride+1] = v11;
43193 0 : i1 = i1+2;
43194 : }
43195 0 : i0 = i0+2;
43196 : }
43197 : }
43198 0 : for(i=0; i<=pcnt-1; i++)
43199 : {
43200 0 : cclosest = buf->ia3.ptr.p_int[i];
43201 0 : dclosest = buf->ra3.ptr.p_double[i];
43202 0 : for(j=0; j<=ccnt-1; j++)
43203 : {
43204 0 : if( ae_fp_less(buf->ra0.ptr.p_double[i*stride+j],dclosest) )
43205 : {
43206 0 : dclosest = buf->ra0.ptr.p_double[i*stride+j];
43207 0 : cclosest = c0+j;
43208 : }
43209 : }
43210 0 : buf->ia3.ptr.p_int[i] = cclosest;
43211 0 : buf->ra3.ptr.p_double[i] = dclosest;
43212 : }
43213 : }
43214 :
43215 : /*
43216 : * Store best centers to XYC[]
43217 : */
43218 0 : for(i=p0; i<=p1-1; i++)
43219 : {
43220 0 : xyc->ptr.p_int[i] = buf->ia3.ptr.p_int[i-p0];
43221 0 : xydist2->ptr.p_double[i] = buf->ra3.ptr.p_double[i-p0];
43222 : }
43223 : }
43224 0 : ae_shared_pool_recycle(bufferpool, &_buf, _state);
43225 0 : ae_frame_leave(_state);
43226 : }
43227 :
43228 :
43229 : /*************************************************************************
43230 : Serial stub for GPL edition.
43231 : *************************************************************************/
43232 0 : ae_bool _trypexec_kmeansupdatedistances(/* Real */ ae_matrix* xy,
43233 : ae_int_t idx0,
43234 : ae_int_t idx1,
43235 : ae_int_t nvars,
43236 : /* Real */ ae_matrix* ct,
43237 : ae_int_t cidx0,
43238 : ae_int_t cidx1,
43239 : /* Integer */ ae_vector* xyc,
43240 : /* Real */ ae_vector* xydist2,
43241 : ae_shared_pool* bufferpool,
43242 : ae_state *_state)
43243 : {
43244 0 : return ae_false;
43245 : }
43246 :
43247 :
43248 : /*************************************************************************
43249 : This function selects initial centers according to specified initialization
43250 : algorithm.
43251 :
43252 : IMPORTANT: this function provides no guarantees regarding selection of
43253 : DIFFERENT centers. Centers returned by this function may
43254 : include duplicates (say, when random sampling is used). It is
43255 : also possible that some centers are empty.
43256 : Algorithm which uses this function must be able to deal with it.
43257 : Say, you may want to use FixCenters() in order to fix empty centers.
43258 :
43259 : INPUT PARAMETERS:
43260 : XY - dataset, array [0..NPoints-1,0..NVars-1].
43261 : NPoints - points count
43262 : NVars - number of variables, NVars>=1
43263 : InitAlgo - initialization algorithm:
43264 : * 0 - automatic selection of best algorithm
43265 : * 1 - random selection
43266 : * 2 - k-means++
43267 : * 3 - fast-greedy init
43268 : *-1 - first K rows of dataset are used (debug algorithm)
43269 : RS - RNG used to select centers
43270 : K - number of centers, K>=1
43271 : CT - possibly preallocated output buffer, resized if needed
43272 : InitBuf - internal buffer, possibly unitialized instance of
43273 : APBuffers. It is recommended to use this instance only
43274 : with SelectInitialCenters() and FixCenters() functions,
43275 : because these functions may allocate really large storage.
43276 : UpdatePool - shared pool seeded with instance of APBuffers structure
43277 : (seed instance can be unitialized). Used internally with
43278 : KMeansUpdateDistances() function. It is recommended
43279 : to use this pool ONLY with KMeansUpdateDistances()
43280 : function.
43281 :
43282 : OUTPUT PARAMETERS:
43283 : CT - set of K clusters, one per row
43284 :
43285 : RESULT:
43286 : True on success, False on failure (impossible to create K independent clusters)
43287 :
43288 : -- ALGLIB --
43289 : Copyright 21.01.2015 by Bochkanov Sergey
43290 : *************************************************************************/
43291 0 : static void clustering_selectinitialcenters(/* Real */ ae_matrix* xy,
43292 : ae_int_t npoints,
43293 : ae_int_t nvars,
43294 : ae_int_t initalgo,
43295 : hqrndstate* rs,
43296 : ae_int_t k,
43297 : /* Real */ ae_matrix* ct,
43298 : apbuffers* initbuf,
43299 : ae_shared_pool* updatepool,
43300 : ae_state *_state)
43301 : {
43302 : ae_int_t cidx;
43303 : ae_int_t i;
43304 : ae_int_t j;
43305 : double v;
43306 : double vv;
43307 : double s;
43308 : ae_int_t lastnz;
43309 : ae_int_t ptidx;
43310 : ae_int_t samplesize;
43311 : ae_int_t samplescntnew;
43312 : ae_int_t samplescntall;
43313 : double samplescale;
43314 :
43315 :
43316 :
43317 : /*
43318 : * Check parameters
43319 : */
43320 0 : ae_assert(npoints>0, "SelectInitialCenters: internal error", _state);
43321 0 : ae_assert(nvars>0, "SelectInitialCenters: internal error", _state);
43322 0 : ae_assert(k>0, "SelectInitialCenters: internal error", _state);
43323 0 : if( initalgo==0 )
43324 : {
43325 0 : initalgo = 3;
43326 : }
43327 0 : rmatrixsetlengthatleast(ct, k, nvars, _state);
43328 :
43329 : /*
43330 : * Random initialization
43331 : */
43332 0 : if( initalgo==-1 )
43333 : {
43334 0 : for(i=0; i<=k-1; i++)
43335 : {
43336 0 : ae_v_move(&ct->ptr.pp_double[i][0], 1, &xy->ptr.pp_double[i%npoints][0], 1, ae_v_len(0,nvars-1));
43337 : }
43338 0 : return;
43339 : }
43340 :
43341 : /*
43342 : * Random initialization
43343 : */
43344 0 : if( initalgo==1 )
43345 : {
43346 0 : for(i=0; i<=k-1; i++)
43347 : {
43348 0 : j = hqrnduniformi(rs, npoints, _state);
43349 0 : ae_v_move(&ct->ptr.pp_double[i][0], 1, &xy->ptr.pp_double[j][0], 1, ae_v_len(0,nvars-1));
43350 : }
43351 0 : return;
43352 : }
43353 :
43354 : /*
43355 : * k-means++ initialization
43356 : */
43357 0 : if( initalgo==2 )
43358 : {
43359 :
43360 : /*
43361 : * Prepare distances array.
43362 : * Select initial center at random.
43363 : */
43364 0 : rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
43365 0 : for(i=0; i<=npoints-1; i++)
43366 : {
43367 0 : initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
43368 : }
43369 0 : ptidx = hqrnduniformi(rs, npoints, _state);
43370 0 : ae_v_move(&ct->ptr.pp_double[0][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43371 :
43372 : /*
43373 : * For each newly added center repeat:
43374 : * * reevaluate distances from points to best centers
43375 : * * sample points with probability dependent on distance
43376 : * * add new center
43377 : */
43378 0 : for(cidx=0; cidx<=k-2; cidx++)
43379 : {
43380 :
43381 : /*
43382 : * Reevaluate distances
43383 : */
43384 0 : s = 0.0;
43385 0 : for(i=0; i<=npoints-1; i++)
43386 : {
43387 0 : v = 0.0;
43388 0 : for(j=0; j<=nvars-1; j++)
43389 : {
43390 0 : vv = xy->ptr.pp_double[i][j]-ct->ptr.pp_double[cidx][j];
43391 0 : v = v+vv*vv;
43392 : }
43393 0 : if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
43394 : {
43395 0 : initbuf->ra0.ptr.p_double[i] = v;
43396 : }
43397 0 : s = s+initbuf->ra0.ptr.p_double[i];
43398 : }
43399 :
43400 : /*
43401 : * If all distances are zero, it means that we can not find enough
43402 : * distinct points. In this case we just select non-distinct center
43403 : * at random and continue iterations. This issue will be handled
43404 : * later in the FixCenters() function.
43405 : */
43406 0 : if( ae_fp_eq(s,0.0) )
43407 : {
43408 0 : ptidx = hqrnduniformi(rs, npoints, _state);
43409 0 : ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43410 0 : continue;
43411 : }
43412 :
43413 : /*
43414 : * Select point as center using its distance.
43415 : * We also handle situation when because of rounding errors
43416 : * no point was selected - in this case, last non-zero one
43417 : * will be used.
43418 : */
43419 0 : v = hqrnduniformr(rs, _state);
43420 0 : vv = 0.0;
43421 0 : lastnz = -1;
43422 0 : ptidx = -1;
43423 0 : for(i=0; i<=npoints-1; i++)
43424 : {
43425 0 : if( ae_fp_eq(initbuf->ra0.ptr.p_double[i],0.0) )
43426 : {
43427 0 : continue;
43428 : }
43429 0 : lastnz = i;
43430 0 : vv = vv+initbuf->ra0.ptr.p_double[i];
43431 0 : if( ae_fp_less_eq(v,vv/s) )
43432 : {
43433 0 : ptidx = i;
43434 0 : break;
43435 : }
43436 : }
43437 0 : ae_assert(lastnz>=0, "SelectInitialCenters: integrity error", _state);
43438 0 : if( ptidx<0 )
43439 : {
43440 0 : ptidx = lastnz;
43441 : }
43442 0 : ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43443 : }
43444 0 : return;
43445 : }
43446 :
43447 : /*
43448 : * "Fast-greedy" algorithm based on "Scalable k-means++".
43449 : *
43450 : * We perform several rounds, within each round we sample about 0.5*K points
43451 : * (not exactly 0.5*K) until we have 2*K points sampled. Before each round
43452 : * we calculate distances from dataset points to closest points sampled so far.
43453 : * We sample dataset points independently using distance xtimes 0.5*K divided by total
43454 : * as probability (similar to k-means++, but each point is sampled independently;
43455 : * after each round we have roughtly 0.5*K points added to sample).
43456 : *
43457 : * After sampling is done, we run "greedy" version of k-means++ on this subsample
43458 : * which selects most distant point on every round.
43459 : */
43460 0 : if( initalgo==3 )
43461 : {
43462 :
43463 : /*
43464 : * Prepare arrays.
43465 : * Select initial center at random, add it to "new" part of sample,
43466 : * which is stored at the beginning of the array
43467 : */
43468 0 : samplesize = 2*k;
43469 0 : samplescale = 0.5*k;
43470 0 : rmatrixsetlengthatleast(&initbuf->rm0, samplesize, nvars, _state);
43471 0 : ptidx = hqrnduniformi(rs, npoints, _state);
43472 0 : ae_v_move(&initbuf->rm0.ptr.pp_double[0][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43473 0 : samplescntnew = 1;
43474 0 : samplescntall = 1;
43475 0 : rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
43476 0 : rvectorsetlengthatleast(&initbuf->ra1, npoints, _state);
43477 0 : ivectorsetlengthatleast(&initbuf->ia1, npoints, _state);
43478 0 : for(i=0; i<=npoints-1; i++)
43479 : {
43480 0 : initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
43481 : }
43482 :
43483 : /*
43484 : * Repeat until samples count is 2*K
43485 : */
43486 0 : while(samplescntall<samplesize)
43487 : {
43488 :
43489 : /*
43490 : * Evaluate distances from points to NEW centers, store to RA1.
43491 : * Reset counter of "new" centers.
43492 : */
43493 0 : kmeansupdatedistances(xy, 0, npoints, nvars, &initbuf->rm0, samplescntall-samplescntnew, samplescntall, &initbuf->ia1, &initbuf->ra1, updatepool, _state);
43494 0 : samplescntnew = 0;
43495 :
43496 : /*
43497 : * Merge new distances with old ones.
43498 : * Calculate sum of distances, if sum is exactly zero - fill sample
43499 : * by randomly selected points and terminate.
43500 : */
43501 0 : s = 0.0;
43502 0 : for(i=0; i<=npoints-1; i++)
43503 : {
43504 0 : initbuf->ra0.ptr.p_double[i] = ae_minreal(initbuf->ra0.ptr.p_double[i], initbuf->ra1.ptr.p_double[i], _state);
43505 0 : s = s+initbuf->ra0.ptr.p_double[i];
43506 : }
43507 0 : if( ae_fp_eq(s,0.0) )
43508 : {
43509 0 : while(samplescntall<samplesize)
43510 : {
43511 0 : ptidx = hqrnduniformi(rs, npoints, _state);
43512 0 : ae_v_move(&initbuf->rm0.ptr.pp_double[samplescntall][0], 1, &xy->ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43513 0 : inc(&samplescntall, _state);
43514 0 : inc(&samplescntnew, _state);
43515 : }
43516 0 : break;
43517 : }
43518 :
43519 : /*
43520 : * Sample points independently.
43521 : */
43522 0 : for(i=0; i<=npoints-1; i++)
43523 : {
43524 0 : if( samplescntall==samplesize )
43525 : {
43526 0 : break;
43527 : }
43528 0 : if( ae_fp_eq(initbuf->ra0.ptr.p_double[i],0.0) )
43529 : {
43530 0 : continue;
43531 : }
43532 0 : if( ae_fp_less_eq(hqrnduniformr(rs, _state),samplescale*initbuf->ra0.ptr.p_double[i]/s) )
43533 : {
43534 0 : ae_v_move(&initbuf->rm0.ptr.pp_double[samplescntall][0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,nvars-1));
43535 0 : inc(&samplescntall, _state);
43536 0 : inc(&samplescntnew, _state);
43537 : }
43538 : }
43539 : }
43540 :
43541 : /*
43542 : * Run greedy version of k-means on sampled points
43543 : */
43544 0 : rvectorsetlengthatleast(&initbuf->ra0, samplescntall, _state);
43545 0 : for(i=0; i<=samplescntall-1; i++)
43546 : {
43547 0 : initbuf->ra0.ptr.p_double[i] = ae_maxrealnumber;
43548 : }
43549 0 : ptidx = hqrnduniformi(rs, samplescntall, _state);
43550 0 : ae_v_move(&ct->ptr.pp_double[0][0], 1, &initbuf->rm0.ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43551 0 : for(cidx=0; cidx<=k-2; cidx++)
43552 : {
43553 :
43554 : /*
43555 : * Reevaluate distances
43556 : */
43557 0 : for(i=0; i<=samplescntall-1; i++)
43558 : {
43559 0 : v = 0.0;
43560 0 : for(j=0; j<=nvars-1; j++)
43561 : {
43562 0 : vv = initbuf->rm0.ptr.pp_double[i][j]-ct->ptr.pp_double[cidx][j];
43563 0 : v = v+vv*vv;
43564 : }
43565 0 : if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
43566 : {
43567 0 : initbuf->ra0.ptr.p_double[i] = v;
43568 : }
43569 : }
43570 :
43571 : /*
43572 : * Select point as center in greedy manner - most distant
43573 : * point is selected.
43574 : */
43575 0 : ptidx = 0;
43576 0 : for(i=0; i<=samplescntall-1; i++)
43577 : {
43578 0 : if( ae_fp_greater(initbuf->ra0.ptr.p_double[i],initbuf->ra0.ptr.p_double[ptidx]) )
43579 : {
43580 0 : ptidx = i;
43581 : }
43582 : }
43583 0 : ae_v_move(&ct->ptr.pp_double[cidx+1][0], 1, &initbuf->rm0.ptr.pp_double[ptidx][0], 1, ae_v_len(0,nvars-1));
43584 : }
43585 0 : return;
43586 : }
43587 :
43588 : /*
43589 : * Internal error
43590 : */
43591 0 : ae_assert(ae_false, "SelectInitialCenters: internal error", _state);
43592 : }
43593 :
43594 :
43595 : /*************************************************************************
43596 : This function "fixes" centers, i.e. replaces ones which have no neighbor
43597 : points by new centers which have at least one neighbor. If it is impossible
43598 : to fix centers (not enough distinct points in the dataset), this function
43599 : returns False.
43600 :
43601 : INPUT PARAMETERS:
43602 : XY - dataset, array [0..NPoints-1,0..NVars-1].
43603 : NPoints - points count, >=1
43604 : NVars - number of variables, NVars>=1
43605 : CT - centers
43606 : K - number of centers, K>=1
43607 : InitBuf - internal buffer, possibly unitialized instance of
43608 : APBuffers. It is recommended to use this instance only
43609 : with SelectInitialCenters() and FixCenters() functions,
43610 : because these functions may allocate really large storage.
43611 : UpdatePool - shared pool seeded with instance of APBuffers structure
43612 : (seed instance can be unitialized). Used internally with
43613 : KMeansUpdateDistances() function. It is recommended
43614 : to use this pool ONLY with KMeansUpdateDistances()
43615 : function.
43616 :
43617 : OUTPUT PARAMETERS:
43618 : CT - set of K centers, one per row
43619 :
43620 : RESULT:
43621 : True on success, False on failure (impossible to create K independent clusters)
43622 :
43623 : -- ALGLIB --
43624 : Copyright 21.01.2015 by Bochkanov Sergey
43625 : *************************************************************************/
43626 0 : static ae_bool clustering_fixcenters(/* Real */ ae_matrix* xy,
43627 : ae_int_t npoints,
43628 : ae_int_t nvars,
43629 : /* Real */ ae_matrix* ct,
43630 : ae_int_t k,
43631 : apbuffers* initbuf,
43632 : ae_shared_pool* updatepool,
43633 : ae_state *_state)
43634 : {
43635 : ae_int_t fixiteration;
43636 : ae_int_t centertofix;
43637 : ae_int_t i;
43638 : ae_int_t j;
43639 : ae_int_t pdistant;
43640 : double ddistant;
43641 : double v;
43642 : ae_bool result;
43643 :
43644 :
43645 0 : ae_assert(npoints>=1, "FixCenters: internal error", _state);
43646 0 : ae_assert(nvars>=1, "FixCenters: internal error", _state);
43647 0 : ae_assert(k>=1, "FixCenters: internal error", _state);
43648 :
43649 : /*
43650 : * Calculate distances from points to best centers (RA0)
43651 : * and best center indexes (IA0)
43652 : */
43653 0 : ivectorsetlengthatleast(&initbuf->ia0, npoints, _state);
43654 0 : rvectorsetlengthatleast(&initbuf->ra0, npoints, _state);
43655 0 : kmeansupdatedistances(xy, 0, npoints, nvars, ct, 0, k, &initbuf->ia0, &initbuf->ra0, updatepool, _state);
43656 :
43657 : /*
43658 : * Repeat loop:
43659 : * * find first center which has no corresponding point
43660 : * * set it to the most distant (from the rest of the centerset) point
43661 : * * recalculate distances, update IA0/RA0
43662 : * * repeat
43663 : *
43664 : * Loop is repeated for at most 2*K iterations. It is stopped once we have
43665 : * no "empty" clusters.
43666 : */
43667 0 : bvectorsetlengthatleast(&initbuf->ba0, k, _state);
43668 0 : for(fixiteration=0; fixiteration<=2*k; fixiteration++)
43669 : {
43670 :
43671 : /*
43672 : * Select center to fix (one which is not mentioned in IA0),
43673 : * terminate if there is no such center.
43674 : * BA0[] stores True for centers which have at least one point.
43675 : */
43676 0 : for(i=0; i<=k-1; i++)
43677 : {
43678 0 : initbuf->ba0.ptr.p_bool[i] = ae_false;
43679 : }
43680 0 : for(i=0; i<=npoints-1; i++)
43681 : {
43682 0 : initbuf->ba0.ptr.p_bool[initbuf->ia0.ptr.p_int[i]] = ae_true;
43683 : }
43684 0 : centertofix = -1;
43685 0 : for(i=0; i<=k-1; i++)
43686 : {
43687 0 : if( !initbuf->ba0.ptr.p_bool[i] )
43688 : {
43689 0 : centertofix = i;
43690 0 : break;
43691 : }
43692 : }
43693 0 : if( centertofix<0 )
43694 : {
43695 0 : result = ae_true;
43696 0 : return result;
43697 : }
43698 :
43699 : /*
43700 : * Replace center to fix by the most distant point.
43701 : * Update IA0/RA0
43702 : */
43703 0 : pdistant = 0;
43704 0 : ddistant = initbuf->ra0.ptr.p_double[pdistant];
43705 0 : for(i=0; i<=npoints-1; i++)
43706 : {
43707 0 : if( ae_fp_greater(initbuf->ra0.ptr.p_double[i],ddistant) )
43708 : {
43709 0 : ddistant = initbuf->ra0.ptr.p_double[i];
43710 0 : pdistant = i;
43711 : }
43712 : }
43713 0 : if( ae_fp_eq(ddistant,0.0) )
43714 : {
43715 0 : break;
43716 : }
43717 0 : ae_v_move(&ct->ptr.pp_double[centertofix][0], 1, &xy->ptr.pp_double[pdistant][0], 1, ae_v_len(0,nvars-1));
43718 0 : for(i=0; i<=npoints-1; i++)
43719 : {
43720 0 : v = 0.0;
43721 0 : for(j=0; j<=nvars-1; j++)
43722 : {
43723 0 : v = v+ae_sqr(xy->ptr.pp_double[i][j]-ct->ptr.pp_double[centertofix][j], _state);
43724 : }
43725 0 : if( ae_fp_less(v,initbuf->ra0.ptr.p_double[i]) )
43726 : {
43727 0 : initbuf->ra0.ptr.p_double[i] = v;
43728 0 : initbuf->ia0.ptr.p_int[i] = centertofix;
43729 : }
43730 : }
43731 : }
43732 0 : result = ae_false;
43733 0 : return result;
43734 : }
43735 :
43736 :
43737 : /*************************************************************************
43738 : This function performs agglomerative hierarchical clustering using
43739 : precomputed distance matrix. Internal function, should not be called
43740 : directly.
43741 :
43742 : INPUT PARAMETERS:
43743 : S - clusterizer state, initialized by ClusterizerCreate()
43744 : D - distance matrix, array[S.NFeatures,S.NFeatures]
43745 : Contents of the matrix is destroyed during
43746 : algorithm operation.
43747 :
43748 : OUTPUT PARAMETERS:
43749 : Rep - clustering results; see description of AHCReport
43750 : structure for more information.
43751 :
43752 : -- ALGLIB --
43753 : Copyright 10.07.2012 by Bochkanov Sergey
43754 : *************************************************************************/
43755 0 : static void clustering_clusterizerrunahcinternal(clusterizerstate* s,
43756 : /* Real */ ae_matrix* d,
43757 : ahcreport* rep,
43758 : ae_state *_state)
43759 : {
43760 : ae_frame _frame_block;
43761 : ae_int_t i;
43762 : ae_int_t j;
43763 : ae_int_t k;
43764 : double v;
43765 : ae_int_t mergeidx;
43766 : ae_int_t c0;
43767 : ae_int_t c1;
43768 : ae_int_t s0;
43769 : ae_int_t s1;
43770 : ae_int_t ar;
43771 : ae_int_t br;
43772 : ae_int_t npoints;
43773 : ae_vector cidx;
43774 : ae_vector csizes;
43775 : ae_vector nnidx;
43776 : ae_matrix cinfo;
43777 : ae_int_t n0;
43778 : ae_int_t n1;
43779 : ae_int_t ni;
43780 : double d01;
43781 :
43782 0 : ae_frame_make(_state, &_frame_block);
43783 0 : memset(&cidx, 0, sizeof(cidx));
43784 0 : memset(&csizes, 0, sizeof(csizes));
43785 0 : memset(&nnidx, 0, sizeof(nnidx));
43786 0 : memset(&cinfo, 0, sizeof(cinfo));
43787 0 : ae_vector_init(&cidx, 0, DT_INT, _state, ae_true);
43788 0 : ae_vector_init(&csizes, 0, DT_INT, _state, ae_true);
43789 0 : ae_vector_init(&nnidx, 0, DT_INT, _state, ae_true);
43790 0 : ae_matrix_init(&cinfo, 0, 0, DT_INT, _state, ae_true);
43791 :
43792 0 : npoints = s->npoints;
43793 :
43794 : /*
43795 : * Fill Rep.NPoints, quick exit when NPoints<=1
43796 : */
43797 0 : rep->npoints = npoints;
43798 0 : if( npoints==0 )
43799 : {
43800 0 : ae_vector_set_length(&rep->p, 0, _state);
43801 0 : ae_matrix_set_length(&rep->z, 0, 0, _state);
43802 0 : ae_matrix_set_length(&rep->pz, 0, 0, _state);
43803 0 : ae_matrix_set_length(&rep->pm, 0, 0, _state);
43804 0 : ae_vector_set_length(&rep->mergedist, 0, _state);
43805 0 : rep->terminationtype = 1;
43806 0 : ae_frame_leave(_state);
43807 0 : return;
43808 : }
43809 0 : if( npoints==1 )
43810 : {
43811 0 : ae_vector_set_length(&rep->p, 1, _state);
43812 0 : ae_matrix_set_length(&rep->z, 0, 0, _state);
43813 0 : ae_matrix_set_length(&rep->pz, 0, 0, _state);
43814 0 : ae_matrix_set_length(&rep->pm, 0, 0, _state);
43815 0 : ae_vector_set_length(&rep->mergedist, 0, _state);
43816 0 : rep->p.ptr.p_int[0] = 0;
43817 0 : rep->terminationtype = 1;
43818 0 : ae_frame_leave(_state);
43819 0 : return;
43820 : }
43821 0 : ae_matrix_set_length(&rep->z, npoints-1, 2, _state);
43822 0 : ae_vector_set_length(&rep->mergedist, npoints-1, _state);
43823 0 : rep->terminationtype = 1;
43824 :
43825 : /*
43826 : * Build list of nearest neighbors
43827 : */
43828 0 : ae_vector_set_length(&nnidx, npoints, _state);
43829 0 : for(i=0; i<=npoints-1; i++)
43830 : {
43831 :
43832 : /*
43833 : * Calculate index of the nearest neighbor
43834 : */
43835 0 : k = -1;
43836 0 : v = ae_maxrealnumber;
43837 0 : for(j=0; j<=npoints-1; j++)
43838 : {
43839 0 : if( j!=i&&ae_fp_less(d->ptr.pp_double[i][j],v) )
43840 : {
43841 0 : k = j;
43842 0 : v = d->ptr.pp_double[i][j];
43843 : }
43844 : }
43845 0 : ae_assert(ae_fp_less(v,ae_maxrealnumber), "ClusterizerRunAHC: internal error", _state);
43846 0 : nnidx.ptr.p_int[i] = k;
43847 : }
43848 :
43849 : /*
43850 : * For AHCAlgo=4 (Ward's method) replace distances by their squares times 0.5
43851 : */
43852 0 : if( s->ahcalgo==4 )
43853 : {
43854 0 : for(i=0; i<=npoints-1; i++)
43855 : {
43856 0 : for(j=0; j<=npoints-1; j++)
43857 : {
43858 0 : d->ptr.pp_double[i][j] = 0.5*d->ptr.pp_double[i][j]*d->ptr.pp_double[i][j];
43859 : }
43860 : }
43861 : }
43862 :
43863 : /*
43864 : * Distance matrix is built, perform merges.
43865 : *
43866 : * NOTE 1: CIdx is array[NPoints] which maps rows/columns of the
43867 : * distance matrix D to indexes of clusters. Values of CIdx
43868 : * from [0,NPoints) denote single-point clusters, and values
43869 : * from [NPoints,2*NPoints-1) denote ones obtained by merging
43870 : * smaller clusters. Negative calues correspond to absent clusters.
43871 : *
43872 : * Initially it contains [0...NPoints-1], after each merge
43873 : * one element of CIdx (one with index C0) is replaced by
43874 : * NPoints+MergeIdx, and another one with index C1 is
43875 : * rewritten by -1.
43876 : *
43877 : * NOTE 2: CSizes is array[NPoints] which stores sizes of clusters.
43878 : *
43879 : */
43880 0 : ae_vector_set_length(&cidx, npoints, _state);
43881 0 : ae_vector_set_length(&csizes, npoints, _state);
43882 0 : for(i=0; i<=npoints-1; i++)
43883 : {
43884 0 : cidx.ptr.p_int[i] = i;
43885 0 : csizes.ptr.p_int[i] = 1;
43886 : }
43887 0 : for(mergeidx=0; mergeidx<=npoints-2; mergeidx++)
43888 : {
43889 :
43890 : /*
43891 : * Select pair of clusters (C0,C1) with CIdx[C0]<CIdx[C1] to merge.
43892 : */
43893 0 : c0 = -1;
43894 0 : c1 = -1;
43895 0 : d01 = ae_maxrealnumber;
43896 0 : for(i=0; i<=npoints-1; i++)
43897 : {
43898 0 : if( cidx.ptr.p_int[i]>=0 )
43899 : {
43900 0 : if( ae_fp_less(d->ptr.pp_double[i][nnidx.ptr.p_int[i]],d01) )
43901 : {
43902 0 : c0 = i;
43903 0 : c1 = nnidx.ptr.p_int[i];
43904 0 : d01 = d->ptr.pp_double[i][nnidx.ptr.p_int[i]];
43905 : }
43906 : }
43907 : }
43908 0 : ae_assert(ae_fp_less(d01,ae_maxrealnumber), "ClusterizerRunAHC: internal error", _state);
43909 0 : if( cidx.ptr.p_int[c0]>cidx.ptr.p_int[c1] )
43910 : {
43911 0 : i = c1;
43912 0 : c1 = c0;
43913 0 : c0 = i;
43914 : }
43915 :
43916 : /*
43917 : * Fill one row of Rep.Z and one element of Rep.MergeDist
43918 : */
43919 0 : rep->z.ptr.pp_int[mergeidx][0] = cidx.ptr.p_int[c0];
43920 0 : rep->z.ptr.pp_int[mergeidx][1] = cidx.ptr.p_int[c1];
43921 0 : rep->mergedist.ptr.p_double[mergeidx] = d01;
43922 :
43923 : /*
43924 : * Update distance matrix:
43925 : * * row/column C0 are updated by distances to the new cluster
43926 : * * row/column C1 are considered empty (we can fill them by zeros,
43927 : * but do not want to spend time - we just ignore them)
43928 : *
43929 : * NOTE: it is important to update distance matrix BEFORE CIdx/CSizes
43930 : * are updated.
43931 : */
43932 0 : ae_assert((((s->ahcalgo==0||s->ahcalgo==1)||s->ahcalgo==2)||s->ahcalgo==3)||s->ahcalgo==4, "ClusterizerRunAHC: internal error", _state);
43933 0 : for(i=0; i<=npoints-1; i++)
43934 : {
43935 0 : if( i!=c0&&i!=c1 )
43936 : {
43937 0 : n0 = csizes.ptr.p_int[c0];
43938 0 : n1 = csizes.ptr.p_int[c1];
43939 0 : ni = csizes.ptr.p_int[i];
43940 0 : if( s->ahcalgo==0 )
43941 : {
43942 0 : d->ptr.pp_double[i][c0] = ae_maxreal(d->ptr.pp_double[i][c0], d->ptr.pp_double[i][c1], _state);
43943 : }
43944 0 : if( s->ahcalgo==1 )
43945 : {
43946 0 : d->ptr.pp_double[i][c0] = ae_minreal(d->ptr.pp_double[i][c0], d->ptr.pp_double[i][c1], _state);
43947 : }
43948 0 : if( s->ahcalgo==2 )
43949 : {
43950 0 : d->ptr.pp_double[i][c0] = (csizes.ptr.p_int[c0]*d->ptr.pp_double[i][c0]+csizes.ptr.p_int[c1]*d->ptr.pp_double[i][c1])/(csizes.ptr.p_int[c0]+csizes.ptr.p_int[c1]);
43951 : }
43952 0 : if( s->ahcalgo==3 )
43953 : {
43954 0 : d->ptr.pp_double[i][c0] = (d->ptr.pp_double[i][c0]+d->ptr.pp_double[i][c1])/2;
43955 : }
43956 0 : if( s->ahcalgo==4 )
43957 : {
43958 0 : d->ptr.pp_double[i][c0] = ((n0+ni)*d->ptr.pp_double[i][c0]+(n1+ni)*d->ptr.pp_double[i][c1]-ni*d01)/(n0+n1+ni);
43959 : }
43960 0 : d->ptr.pp_double[c0][i] = d->ptr.pp_double[i][c0];
43961 : }
43962 : }
43963 :
43964 : /*
43965 : * Update CIdx and CSizes
43966 : */
43967 0 : cidx.ptr.p_int[c0] = npoints+mergeidx;
43968 0 : cidx.ptr.p_int[c1] = -1;
43969 0 : csizes.ptr.p_int[c0] = csizes.ptr.p_int[c0]+csizes.ptr.p_int[c1];
43970 0 : csizes.ptr.p_int[c1] = 0;
43971 :
43972 : /*
43973 : * Update nearest neighbors array:
43974 : * * update nearest neighbors of everything except for C0/C1
43975 : * * update neighbors of C0/C1
43976 : */
43977 0 : for(i=0; i<=npoints-1; i++)
43978 : {
43979 0 : if( (cidx.ptr.p_int[i]>=0&&i!=c0)&&(nnidx.ptr.p_int[i]==c0||nnidx.ptr.p_int[i]==c1) )
43980 : {
43981 :
43982 : /*
43983 : * I-th cluster which is distinct from C0/C1 has former C0/C1 cluster as its nearest
43984 : * neighbor. We handle this issue depending on specific AHC algorithm being used.
43985 : */
43986 0 : if( s->ahcalgo==1 )
43987 : {
43988 :
43989 : /*
43990 : * Single linkage. Merging of two clusters together
43991 : * does NOT change distances between new cluster and
43992 : * other clusters.
43993 : *
43994 : * The only thing we have to do is to update nearest neighbor index
43995 : */
43996 0 : nnidx.ptr.p_int[i] = c0;
43997 : }
43998 : else
43999 : {
44000 :
44001 : /*
44002 : * Something other than single linkage. We have to re-examine
44003 : * all the row to find nearest neighbor.
44004 : */
44005 0 : k = -1;
44006 0 : v = ae_maxrealnumber;
44007 0 : for(j=0; j<=npoints-1; j++)
44008 : {
44009 0 : if( (cidx.ptr.p_int[j]>=0&&j!=i)&&ae_fp_less(d->ptr.pp_double[i][j],v) )
44010 : {
44011 0 : k = j;
44012 0 : v = d->ptr.pp_double[i][j];
44013 : }
44014 : }
44015 0 : ae_assert(ae_fp_less(v,ae_maxrealnumber)||mergeidx==npoints-2, "ClusterizerRunAHC: internal error", _state);
44016 0 : nnidx.ptr.p_int[i] = k;
44017 : }
44018 : }
44019 : }
44020 0 : k = -1;
44021 0 : v = ae_maxrealnumber;
44022 0 : for(j=0; j<=npoints-1; j++)
44023 : {
44024 0 : if( (cidx.ptr.p_int[j]>=0&&j!=c0)&&ae_fp_less(d->ptr.pp_double[c0][j],v) )
44025 : {
44026 0 : k = j;
44027 0 : v = d->ptr.pp_double[c0][j];
44028 : }
44029 : }
44030 0 : ae_assert(ae_fp_less(v,ae_maxrealnumber)||mergeidx==npoints-2, "ClusterizerRunAHC: internal error", _state);
44031 0 : nnidx.ptr.p_int[c0] = k;
44032 : }
44033 :
44034 : /*
44035 : * Calculate Rep.P and Rep.PM.
44036 : *
44037 : * In order to do that, we fill CInfo matrix - (2*NPoints-1)*3 matrix,
44038 : * with I-th row containing:
44039 : * * CInfo[I,0] - size of I-th cluster
44040 : * * CInfo[I,1] - beginning of I-th cluster
44041 : * * CInfo[I,2] - end of I-th cluster
44042 : * * CInfo[I,3] - height of I-th cluster
44043 : *
44044 : * We perform it as follows:
44045 : * * first NPoints clusters have unit size (CInfo[I,0]=1) and zero
44046 : * height (CInfo[I,3]=0)
44047 : * * we replay NPoints-1 merges from first to last and fill sizes of
44048 : * corresponding clusters (new size is a sum of sizes of clusters
44049 : * being merged) and height (new height is max(heights)+1).
44050 : * * now we ready to determine locations of clusters. Last cluster
44051 : * spans entire dataset, we know it. We replay merges from last to
44052 : * first, during each merge we already know location of the merge
44053 : * result, and we can position first cluster to the left part of
44054 : * the result, and second cluster to the right part.
44055 : */
44056 0 : ae_vector_set_length(&rep->p, npoints, _state);
44057 0 : ae_matrix_set_length(&rep->pm, npoints-1, 6, _state);
44058 0 : ae_matrix_set_length(&cinfo, 2*npoints-1, 4, _state);
44059 0 : for(i=0; i<=npoints-1; i++)
44060 : {
44061 0 : cinfo.ptr.pp_int[i][0] = 1;
44062 0 : cinfo.ptr.pp_int[i][3] = 0;
44063 : }
44064 0 : for(i=0; i<=npoints-2; i++)
44065 : {
44066 0 : cinfo.ptr.pp_int[npoints+i][0] = cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][0]][0]+cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][1]][0];
44067 0 : cinfo.ptr.pp_int[npoints+i][3] = ae_maxint(cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][0]][3], cinfo.ptr.pp_int[rep->z.ptr.pp_int[i][1]][3], _state)+1;
44068 : }
44069 0 : cinfo.ptr.pp_int[2*npoints-2][1] = 0;
44070 0 : cinfo.ptr.pp_int[2*npoints-2][2] = npoints-1;
44071 0 : for(i=npoints-2; i>=0; i--)
44072 : {
44073 :
44074 : /*
44075 : * We merge C0 which spans [A0,B0] and C1 (spans [A1,B1]),
44076 : * with unknown A0, B0, A1, B1. However, we know that result
44077 : * is CR, which spans [AR,BR] with known AR/BR, and we know
44078 : * sizes of C0, C1, CR (denotes as S0, S1, SR).
44079 : */
44080 0 : c0 = rep->z.ptr.pp_int[i][0];
44081 0 : c1 = rep->z.ptr.pp_int[i][1];
44082 0 : s0 = cinfo.ptr.pp_int[c0][0];
44083 0 : s1 = cinfo.ptr.pp_int[c1][0];
44084 0 : ar = cinfo.ptr.pp_int[npoints+i][1];
44085 0 : br = cinfo.ptr.pp_int[npoints+i][2];
44086 0 : cinfo.ptr.pp_int[c0][1] = ar;
44087 0 : cinfo.ptr.pp_int[c0][2] = ar+s0-1;
44088 0 : cinfo.ptr.pp_int[c1][1] = br-(s1-1);
44089 0 : cinfo.ptr.pp_int[c1][2] = br;
44090 0 : rep->pm.ptr.pp_int[i][0] = cinfo.ptr.pp_int[c0][1];
44091 0 : rep->pm.ptr.pp_int[i][1] = cinfo.ptr.pp_int[c0][2];
44092 0 : rep->pm.ptr.pp_int[i][2] = cinfo.ptr.pp_int[c1][1];
44093 0 : rep->pm.ptr.pp_int[i][3] = cinfo.ptr.pp_int[c1][2];
44094 0 : rep->pm.ptr.pp_int[i][4] = cinfo.ptr.pp_int[c0][3];
44095 0 : rep->pm.ptr.pp_int[i][5] = cinfo.ptr.pp_int[c1][3];
44096 : }
44097 0 : for(i=0; i<=npoints-1; i++)
44098 : {
44099 0 : ae_assert(cinfo.ptr.pp_int[i][1]==cinfo.ptr.pp_int[i][2], "Assertion failed", _state);
44100 0 : rep->p.ptr.p_int[i] = cinfo.ptr.pp_int[i][1];
44101 : }
44102 :
44103 : /*
44104 : * Calculate Rep.PZ
44105 : */
44106 0 : ae_matrix_set_length(&rep->pz, npoints-1, 2, _state);
44107 0 : for(i=0; i<=npoints-2; i++)
44108 : {
44109 0 : rep->pz.ptr.pp_int[i][0] = rep->z.ptr.pp_int[i][0];
44110 0 : rep->pz.ptr.pp_int[i][1] = rep->z.ptr.pp_int[i][1];
44111 0 : if( rep->pz.ptr.pp_int[i][0]<npoints )
44112 : {
44113 0 : rep->pz.ptr.pp_int[i][0] = rep->p.ptr.p_int[rep->pz.ptr.pp_int[i][0]];
44114 : }
44115 0 : if( rep->pz.ptr.pp_int[i][1]<npoints )
44116 : {
44117 0 : rep->pz.ptr.pp_int[i][1] = rep->p.ptr.p_int[rep->pz.ptr.pp_int[i][1]];
44118 : }
44119 : }
44120 0 : ae_frame_leave(_state);
44121 : }
44122 :
44123 :
44124 : /*************************************************************************
44125 : This function recursively evaluates distance matrix for SOME (not all!)
44126 : distance types.
44127 :
44128 : INPUT PARAMETERS:
44129 : XY - array[?,NFeatures], dataset
44130 : NFeatures- number of features, >=1
44131 : DistType- distance function:
44132 : * 0 Chebyshev distance (L-inf norm)
44133 : * 1 city block distance (L1 norm)
44134 : D - preallocated output matrix
44135 : I0,I1 - half interval of rows to calculate: [I0,I1) is processed
44136 : J0,J1 - half interval of cols to calculate: [J0,J1) is processed
44137 :
44138 : OUTPUT PARAMETERS:
44139 : D - array[NPoints,NPoints], distance matrix
44140 : upper triangle and main diagonal are initialized with
44141 : data.
44142 :
44143 : NOTE: intersection of [I0,I1) and [J0,J1) may completely lie in upper
44144 : triangle, only partially intersect with it, or have zero intersection.
44145 : In any case, only intersection of submatrix given by [I0,I1)*[J0,J1)
44146 : with upper triangle of the matrix is evaluated.
44147 :
44148 : Say, for 4x4 distance matrix A:
44149 : * [0,2)*[0,2) will result in evaluation of A00, A01, A11
44150 : * [2,4)*[2,4) will result in evaluation of A22, A23, A32, A33
44151 : * [2,4)*[0,2) will result in evaluation of empty set of elements
44152 :
44153 :
44154 : -- ALGLIB --
44155 : Copyright 07.04.2013 by Bochkanov Sergey
44156 : *************************************************************************/
44157 0 : static void clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
44158 : ae_int_t nfeatures,
44159 : ae_int_t disttype,
44160 : /* Real */ ae_matrix* d,
44161 : ae_int_t i0,
44162 : ae_int_t i1,
44163 : ae_int_t j0,
44164 : ae_int_t j1,
44165 : ae_state *_state)
44166 : {
44167 : double rcomplexity;
44168 : ae_int_t len0;
44169 : ae_int_t len1;
44170 : ae_int_t i;
44171 : ae_int_t j;
44172 : ae_int_t k;
44173 : double v;
44174 : double vv;
44175 :
44176 :
44177 0 : ae_assert(disttype==0||disttype==1, "EvaluateDistanceMatrixRec: incorrect DistType", _state);
44178 :
44179 : /*
44180 : * Normalize J0/J1:
44181 : * * J0:=max(J0,I0) - we ignore lower triangle
44182 : * * J1:=max(J1,J0) - normalize J1
44183 : */
44184 0 : j0 = ae_maxint(j0, i0, _state);
44185 0 : j1 = ae_maxint(j1, j0, _state);
44186 0 : if( j1<=j0||i1<=i0 )
44187 : {
44188 0 : return;
44189 : }
44190 0 : rcomplexity = clustering_complexitymultiplier*rmul3((double)(i1-i0), (double)(j1-j0), (double)(nfeatures), _state);
44191 0 : if( (i1-i0>2||j1-j0>2)&&ae_fp_greater_eq(rcomplexity,smpactivationlevel(_state)) )
44192 : {
44193 0 : if( _trypexec_clustering_evaluatedistancematrixrec(xy,nfeatures,disttype,d,i0,i1,j0,j1, _state) )
44194 : {
44195 0 : return;
44196 : }
44197 : }
44198 :
44199 : /*
44200 : * Try to process in parallel. Two condtions must hold in order to
44201 : * activate parallel processing:
44202 : * 1. I1-I0>2 or J1-J0>2
44203 : * 2. (I1-I0)*(J1-J0)*NFeatures>=ParallelComplexity
44204 : *
44205 : * NOTE: all quantities are converted to reals in order to avoid
44206 : * integer overflow during multiplication
44207 : *
44208 : * NOTE: strict inequality in (1) is necessary to reduce task to 2x2
44209 : * basecases. In future versions we will be able to handle such
44210 : * basecases more efficiently than 1x1 cases.
44211 : */
44212 0 : if( ae_fp_greater_eq(rcomplexity,spawnlevel(_state))&&(i1-i0>2||j1-j0>2) )
44213 : {
44214 :
44215 : /*
44216 : * Recursive division along largest of dimensions
44217 : */
44218 0 : if( i1-i0>j1-j0 )
44219 : {
44220 0 : splitlengtheven(i1-i0, &len0, &len1, _state);
44221 0 : clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i0+len0, j0, j1, _state);
44222 0 : clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0+len0, i1, j0, j1, _state);
44223 : }
44224 : else
44225 : {
44226 0 : splitlengtheven(j1-j0, &len0, &len1, _state);
44227 0 : clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i1, j0, j0+len0, _state);
44228 0 : clustering_evaluatedistancematrixrec(xy, nfeatures, disttype, d, i0, i1, j0+len0, j1, _state);
44229 : }
44230 0 : return;
44231 : }
44232 :
44233 : /*
44234 : * Sequential processing
44235 : */
44236 0 : for(i=i0; i<=i1-1; i++)
44237 : {
44238 0 : for(j=j0; j<=j1-1; j++)
44239 : {
44240 0 : if( j>=i )
44241 : {
44242 0 : v = 0.0;
44243 0 : if( disttype==0 )
44244 : {
44245 0 : for(k=0; k<=nfeatures-1; k++)
44246 : {
44247 0 : vv = xy->ptr.pp_double[i][k]-xy->ptr.pp_double[j][k];
44248 0 : if( ae_fp_less(vv,(double)(0)) )
44249 : {
44250 0 : vv = -vv;
44251 : }
44252 0 : if( ae_fp_greater(vv,v) )
44253 : {
44254 0 : v = vv;
44255 : }
44256 : }
44257 : }
44258 0 : if( disttype==1 )
44259 : {
44260 0 : for(k=0; k<=nfeatures-1; k++)
44261 : {
44262 0 : vv = xy->ptr.pp_double[i][k]-xy->ptr.pp_double[j][k];
44263 0 : if( ae_fp_less(vv,(double)(0)) )
44264 : {
44265 0 : vv = -vv;
44266 : }
44267 0 : v = v+vv;
44268 : }
44269 : }
44270 0 : d->ptr.pp_double[i][j] = v;
44271 : }
44272 : }
44273 : }
44274 : }
44275 :
44276 :
44277 : /*************************************************************************
44278 : Serial stub for GPL edition.
44279 : *************************************************************************/
44280 0 : ae_bool _trypexec_clustering_evaluatedistancematrixrec(/* Real */ ae_matrix* xy,
44281 : ae_int_t nfeatures,
44282 : ae_int_t disttype,
44283 : /* Real */ ae_matrix* d,
44284 : ae_int_t i0,
44285 : ae_int_t i1,
44286 : ae_int_t j0,
44287 : ae_int_t j1,
44288 : ae_state *_state)
44289 : {
44290 0 : return ae_false;
44291 : }
44292 :
44293 :
44294 0 : void _kmeansbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
44295 : {
44296 0 : kmeansbuffers *p = (kmeansbuffers*)_p;
44297 0 : ae_touch_ptr((void*)p);
44298 0 : ae_matrix_init(&p->ct, 0, 0, DT_REAL, _state, make_automatic);
44299 0 : ae_matrix_init(&p->ctbest, 0, 0, DT_REAL, _state, make_automatic);
44300 0 : ae_vector_init(&p->xycbest, 0, DT_INT, _state, make_automatic);
44301 0 : ae_vector_init(&p->xycprev, 0, DT_INT, _state, make_automatic);
44302 0 : ae_vector_init(&p->d2, 0, DT_REAL, _state, make_automatic);
44303 0 : ae_vector_init(&p->csizes, 0, DT_INT, _state, make_automatic);
44304 0 : _apbuffers_init(&p->initbuf, _state, make_automatic);
44305 0 : ae_shared_pool_init(&p->updatepool, _state, make_automatic);
44306 0 : }
44307 :
44308 :
44309 0 : void _kmeansbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44310 : {
44311 0 : kmeansbuffers *dst = (kmeansbuffers*)_dst;
44312 0 : kmeansbuffers *src = (kmeansbuffers*)_src;
44313 0 : ae_matrix_init_copy(&dst->ct, &src->ct, _state, make_automatic);
44314 0 : ae_matrix_init_copy(&dst->ctbest, &src->ctbest, _state, make_automatic);
44315 0 : ae_vector_init_copy(&dst->xycbest, &src->xycbest, _state, make_automatic);
44316 0 : ae_vector_init_copy(&dst->xycprev, &src->xycprev, _state, make_automatic);
44317 0 : ae_vector_init_copy(&dst->d2, &src->d2, _state, make_automatic);
44318 0 : ae_vector_init_copy(&dst->csizes, &src->csizes, _state, make_automatic);
44319 0 : _apbuffers_init_copy(&dst->initbuf, &src->initbuf, _state, make_automatic);
44320 0 : ae_shared_pool_init_copy(&dst->updatepool, &src->updatepool, _state, make_automatic);
44321 0 : }
44322 :
44323 :
44324 0 : void _kmeansbuffers_clear(void* _p)
44325 : {
44326 0 : kmeansbuffers *p = (kmeansbuffers*)_p;
44327 0 : ae_touch_ptr((void*)p);
44328 0 : ae_matrix_clear(&p->ct);
44329 0 : ae_matrix_clear(&p->ctbest);
44330 0 : ae_vector_clear(&p->xycbest);
44331 0 : ae_vector_clear(&p->xycprev);
44332 0 : ae_vector_clear(&p->d2);
44333 0 : ae_vector_clear(&p->csizes);
44334 0 : _apbuffers_clear(&p->initbuf);
44335 0 : ae_shared_pool_clear(&p->updatepool);
44336 0 : }
44337 :
44338 :
44339 0 : void _kmeansbuffers_destroy(void* _p)
44340 : {
44341 0 : kmeansbuffers *p = (kmeansbuffers*)_p;
44342 0 : ae_touch_ptr((void*)p);
44343 0 : ae_matrix_destroy(&p->ct);
44344 0 : ae_matrix_destroy(&p->ctbest);
44345 0 : ae_vector_destroy(&p->xycbest);
44346 0 : ae_vector_destroy(&p->xycprev);
44347 0 : ae_vector_destroy(&p->d2);
44348 0 : ae_vector_destroy(&p->csizes);
44349 0 : _apbuffers_destroy(&p->initbuf);
44350 0 : ae_shared_pool_destroy(&p->updatepool);
44351 0 : }
44352 :
44353 :
44354 0 : void _clusterizerstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
44355 : {
44356 0 : clusterizerstate *p = (clusterizerstate*)_p;
44357 0 : ae_touch_ptr((void*)p);
44358 0 : ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
44359 0 : ae_matrix_init(&p->d, 0, 0, DT_REAL, _state, make_automatic);
44360 0 : ae_matrix_init(&p->tmpd, 0, 0, DT_REAL, _state, make_automatic);
44361 0 : _apbuffers_init(&p->distbuf, _state, make_automatic);
44362 0 : _kmeansbuffers_init(&p->kmeanstmp, _state, make_automatic);
44363 0 : }
44364 :
44365 :
44366 0 : void _clusterizerstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44367 : {
44368 0 : clusterizerstate *dst = (clusterizerstate*)_dst;
44369 0 : clusterizerstate *src = (clusterizerstate*)_src;
44370 0 : dst->npoints = src->npoints;
44371 0 : dst->nfeatures = src->nfeatures;
44372 0 : dst->disttype = src->disttype;
44373 0 : ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
44374 0 : ae_matrix_init_copy(&dst->d, &src->d, _state, make_automatic);
44375 0 : dst->ahcalgo = src->ahcalgo;
44376 0 : dst->kmeansrestarts = src->kmeansrestarts;
44377 0 : dst->kmeansmaxits = src->kmeansmaxits;
44378 0 : dst->kmeansinitalgo = src->kmeansinitalgo;
44379 0 : dst->kmeansdbgnoits = src->kmeansdbgnoits;
44380 0 : dst->seed = src->seed;
44381 0 : ae_matrix_init_copy(&dst->tmpd, &src->tmpd, _state, make_automatic);
44382 0 : _apbuffers_init_copy(&dst->distbuf, &src->distbuf, _state, make_automatic);
44383 0 : _kmeansbuffers_init_copy(&dst->kmeanstmp, &src->kmeanstmp, _state, make_automatic);
44384 0 : }
44385 :
44386 :
44387 0 : void _clusterizerstate_clear(void* _p)
44388 : {
44389 0 : clusterizerstate *p = (clusterizerstate*)_p;
44390 0 : ae_touch_ptr((void*)p);
44391 0 : ae_matrix_clear(&p->xy);
44392 0 : ae_matrix_clear(&p->d);
44393 0 : ae_matrix_clear(&p->tmpd);
44394 0 : _apbuffers_clear(&p->distbuf);
44395 0 : _kmeansbuffers_clear(&p->kmeanstmp);
44396 0 : }
44397 :
44398 :
44399 0 : void _clusterizerstate_destroy(void* _p)
44400 : {
44401 0 : clusterizerstate *p = (clusterizerstate*)_p;
44402 0 : ae_touch_ptr((void*)p);
44403 0 : ae_matrix_destroy(&p->xy);
44404 0 : ae_matrix_destroy(&p->d);
44405 0 : ae_matrix_destroy(&p->tmpd);
44406 0 : _apbuffers_destroy(&p->distbuf);
44407 0 : _kmeansbuffers_destroy(&p->kmeanstmp);
44408 0 : }
44409 :
44410 :
44411 0 : void _ahcreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
44412 : {
44413 0 : ahcreport *p = (ahcreport*)_p;
44414 0 : ae_touch_ptr((void*)p);
44415 0 : ae_vector_init(&p->p, 0, DT_INT, _state, make_automatic);
44416 0 : ae_matrix_init(&p->z, 0, 0, DT_INT, _state, make_automatic);
44417 0 : ae_matrix_init(&p->pz, 0, 0, DT_INT, _state, make_automatic);
44418 0 : ae_matrix_init(&p->pm, 0, 0, DT_INT, _state, make_automatic);
44419 0 : ae_vector_init(&p->mergedist, 0, DT_REAL, _state, make_automatic);
44420 0 : }
44421 :
44422 :
44423 0 : void _ahcreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44424 : {
44425 0 : ahcreport *dst = (ahcreport*)_dst;
44426 0 : ahcreport *src = (ahcreport*)_src;
44427 0 : dst->terminationtype = src->terminationtype;
44428 0 : dst->npoints = src->npoints;
44429 0 : ae_vector_init_copy(&dst->p, &src->p, _state, make_automatic);
44430 0 : ae_matrix_init_copy(&dst->z, &src->z, _state, make_automatic);
44431 0 : ae_matrix_init_copy(&dst->pz, &src->pz, _state, make_automatic);
44432 0 : ae_matrix_init_copy(&dst->pm, &src->pm, _state, make_automatic);
44433 0 : ae_vector_init_copy(&dst->mergedist, &src->mergedist, _state, make_automatic);
44434 0 : }
44435 :
44436 :
44437 0 : void _ahcreport_clear(void* _p)
44438 : {
44439 0 : ahcreport *p = (ahcreport*)_p;
44440 0 : ae_touch_ptr((void*)p);
44441 0 : ae_vector_clear(&p->p);
44442 0 : ae_matrix_clear(&p->z);
44443 0 : ae_matrix_clear(&p->pz);
44444 0 : ae_matrix_clear(&p->pm);
44445 0 : ae_vector_clear(&p->mergedist);
44446 0 : }
44447 :
44448 :
44449 0 : void _ahcreport_destroy(void* _p)
44450 : {
44451 0 : ahcreport *p = (ahcreport*)_p;
44452 0 : ae_touch_ptr((void*)p);
44453 0 : ae_vector_destroy(&p->p);
44454 0 : ae_matrix_destroy(&p->z);
44455 0 : ae_matrix_destroy(&p->pz);
44456 0 : ae_matrix_destroy(&p->pm);
44457 0 : ae_vector_destroy(&p->mergedist);
44458 0 : }
44459 :
44460 :
44461 0 : void _kmeansreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
44462 : {
44463 0 : kmeansreport *p = (kmeansreport*)_p;
44464 0 : ae_touch_ptr((void*)p);
44465 0 : ae_matrix_init(&p->c, 0, 0, DT_REAL, _state, make_automatic);
44466 0 : ae_vector_init(&p->cidx, 0, DT_INT, _state, make_automatic);
44467 0 : }
44468 :
44469 :
44470 0 : void _kmeansreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
44471 : {
44472 0 : kmeansreport *dst = (kmeansreport*)_dst;
44473 0 : kmeansreport *src = (kmeansreport*)_src;
44474 0 : dst->npoints = src->npoints;
44475 0 : dst->nfeatures = src->nfeatures;
44476 0 : dst->terminationtype = src->terminationtype;
44477 0 : dst->iterationscount = src->iterationscount;
44478 0 : dst->energy = src->energy;
44479 0 : dst->k = src->k;
44480 0 : ae_matrix_init_copy(&dst->c, &src->c, _state, make_automatic);
44481 0 : ae_vector_init_copy(&dst->cidx, &src->cidx, _state, make_automatic);
44482 0 : }
44483 :
44484 :
44485 0 : void _kmeansreport_clear(void* _p)
44486 : {
44487 0 : kmeansreport *p = (kmeansreport*)_p;
44488 0 : ae_touch_ptr((void*)p);
44489 0 : ae_matrix_clear(&p->c);
44490 0 : ae_vector_clear(&p->cidx);
44491 0 : }
44492 :
44493 :
44494 0 : void _kmeansreport_destroy(void* _p)
44495 : {
44496 0 : kmeansreport *p = (kmeansreport*)_p;
44497 0 : ae_touch_ptr((void*)p);
44498 0 : ae_matrix_destroy(&p->c);
44499 0 : ae_vector_destroy(&p->cidx);
44500 0 : }
44501 :
44502 :
44503 : #endif
44504 : #if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
44505 :
44506 :
44507 : /*************************************************************************
44508 : This function creates buffer structure which can be used to perform
44509 : parallel inference requests.
44510 :
44511 : DF subpackage provides two sets of computing functions - ones which use
44512 : internal buffer of DF model (these functions are single-threaded because
44513 : they use same buffer, which can not shared between threads), and ones
44514 : which use external buffer.
44515 :
44516 : This function is used to initialize external buffer.
44517 :
44518 : INPUT PARAMETERS
44519 : Model - DF model which is associated with newly created buffer
44520 :
44521 : OUTPUT PARAMETERS
44522 : Buf - external buffer.
44523 :
44524 :
44525 : IMPORTANT: buffer object should be used only with model which was used to
44526 : initialize buffer. Any attempt to use buffer with different
44527 : object is dangerous - you may get integrity check failure
44528 : (exception) because sizes of internal arrays do not fit to
44529 : dimensions of the model structure.
44530 :
44531 : -- ALGLIB --
44532 : Copyright 15.02.2019 by Bochkanov Sergey
44533 : *************************************************************************/
44534 0 : void dfcreatebuffer(decisionforest* model,
44535 : decisionforestbuffer* buf,
44536 : ae_state *_state)
44537 : {
44538 :
44539 0 : _decisionforestbuffer_clear(buf);
44540 :
44541 0 : ae_vector_set_length(&buf->x, model->nvars, _state);
44542 0 : ae_vector_set_length(&buf->y, model->nclasses, _state);
44543 0 : }
44544 :
44545 :
44546 : /*************************************************************************
44547 : This subroutine creates DecisionForestBuilder object which is used to
44548 : train decision forests.
44549 :
44550 : By default, new builder stores empty dataset and some reasonable default
44551 : settings. At the very least, you should specify dataset prior to building
44552 : decision forest. You can also tweak settings of the forest construction
44553 : algorithm (recommended, although default setting should work well).
44554 :
44555 : Following actions are mandatory:
44556 : * calling dfbuildersetdataset() to specify dataset
44557 : * calling dfbuilderbuildrandomforest() to build decision forest using
44558 : current dataset and default settings
44559 :
44560 : Additionally, you may call:
44561 : * dfbuildersetrndvars() or dfbuildersetrndvarsratio() to specify number of
44562 : variables randomly chosen for each split
44563 : * dfbuildersetsubsampleratio() to specify fraction of the dataset randomly
44564 : subsampled to build each tree
44565 : * dfbuildersetseed() to control random seed chosen for tree construction
44566 :
44567 : INPUT PARAMETERS:
44568 : none
44569 :
44570 : OUTPUT PARAMETERS:
44571 : S - decision forest builder
44572 :
44573 : -- ALGLIB --
44574 : Copyright 21.05.2018 by Bochkanov Sergey
44575 : *************************************************************************/
44576 0 : void dfbuildercreate(decisionforestbuilder* s, ae_state *_state)
44577 : {
44578 :
44579 0 : _decisionforestbuilder_clear(s);
44580 :
44581 :
44582 : /*
44583 : * Empty dataset
44584 : */
44585 0 : s->dstype = -1;
44586 0 : s->npoints = 0;
44587 0 : s->nvars = 0;
44588 0 : s->nclasses = 1;
44589 :
44590 : /*
44591 : * Default training settings
44592 : */
44593 0 : s->rdfalgo = 0;
44594 0 : s->rdfratio = 0.5;
44595 0 : s->rdfvars = 0.0;
44596 0 : s->rdfglobalseed = 0;
44597 0 : s->rdfsplitstrength = 2;
44598 0 : s->rdfimportance = 0;
44599 :
44600 : /*
44601 : * Other fields
44602 : */
44603 0 : s->rdfprogress = 0;
44604 0 : s->rdftotal = 1;
44605 0 : }
44606 :
44607 :
44608 : /*************************************************************************
44609 : This subroutine adds dense dataset to the internal storage of the builder
44610 : object. Specifying your dataset in the dense format means that the dense
44611 : version of the forest construction algorithm will be invoked.
44612 :
44613 : INPUT PARAMETERS:
44614 : S - decision forest builder object
44615 : XY - array[NPoints,NVars+1] (minimum size; actual size can
44616 : be larger, only leading part is used anyway), dataset:
44617 : * first NVars elements of each row store values of the
44618 : independent variables
44619 : * last column store class number (in 0...NClasses-1)
44620 : or real value of the dependent variable
44621 : NPoints - number of rows in the dataset, NPoints>=1
44622 : NVars - number of independent variables, NVars>=1
44623 : NClasses - indicates type of the problem being solved:
44624 : * NClasses>=2 means that classification problem is
44625 : solved (last column of the dataset stores class
44626 : number)
44627 : * NClasses=1 means that regression problem is solved
44628 : (last column of the dataset stores variable value)
44629 :
44630 : OUTPUT PARAMETERS:
44631 : S - decision forest builder
44632 :
44633 : -- ALGLIB --
44634 : Copyright 21.05.2018 by Bochkanov Sergey
44635 : *************************************************************************/
44636 0 : void dfbuildersetdataset(decisionforestbuilder* s,
44637 : /* Real */ ae_matrix* xy,
44638 : ae_int_t npoints,
44639 : ae_int_t nvars,
44640 : ae_int_t nclasses,
44641 : ae_state *_state)
44642 : {
44643 : ae_int_t i;
44644 : ae_int_t j;
44645 :
44646 :
44647 :
44648 : /*
44649 : * Check parameters
44650 : */
44651 0 : ae_assert(npoints>=1, "dfbuildersetdataset: npoints<1", _state);
44652 0 : ae_assert(nvars>=1, "dfbuildersetdataset: nvars<1", _state);
44653 0 : ae_assert(nclasses>=1, "dfbuildersetdataset: nclasses<1", _state);
44654 0 : ae_assert(xy->rows>=npoints, "dfbuildersetdataset: rows(xy)<npoints", _state);
44655 0 : ae_assert(xy->cols>=nvars+1, "dfbuildersetdataset: cols(xy)<nvars+1", _state);
44656 0 : ae_assert(apservisfinitematrix(xy, npoints, nvars+1, _state), "dfbuildersetdataset: xy parameter contains INFs or NANs", _state);
44657 0 : if( nclasses>1 )
44658 : {
44659 0 : for(i=0; i<=npoints-1; i++)
44660 : {
44661 0 : j = ae_round(xy->ptr.pp_double[i][nvars], _state);
44662 0 : ae_assert(j>=0&&j<nclasses, "dfbuildersetdataset: last column of xy contains invalid class number", _state);
44663 : }
44664 : }
44665 :
44666 : /*
44667 : * Set dataset
44668 : */
44669 0 : s->dstype = 0;
44670 0 : s->npoints = npoints;
44671 0 : s->nvars = nvars;
44672 0 : s->nclasses = nclasses;
44673 0 : rvectorsetlengthatleast(&s->dsdata, npoints*nvars, _state);
44674 0 : for(i=0; i<=npoints-1; i++)
44675 : {
44676 0 : for(j=0; j<=nvars-1; j++)
44677 : {
44678 0 : s->dsdata.ptr.p_double[j*npoints+i] = xy->ptr.pp_double[i][j];
44679 : }
44680 : }
44681 0 : if( nclasses>1 )
44682 : {
44683 0 : ivectorsetlengthatleast(&s->dsival, npoints, _state);
44684 0 : for(i=0; i<=npoints-1; i++)
44685 : {
44686 0 : s->dsival.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
44687 : }
44688 : }
44689 : else
44690 : {
44691 0 : rvectorsetlengthatleast(&s->dsrval, npoints, _state);
44692 0 : for(i=0; i<=npoints-1; i++)
44693 : {
44694 0 : s->dsrval.ptr.p_double[i] = xy->ptr.pp_double[i][nvars];
44695 : }
44696 : }
44697 0 : }
44698 :
44699 :
44700 : /*************************************************************************
44701 : This function sets number of variables (in [1,NVars] range) used by
44702 : decision forest construction algorithm.
44703 :
44704 : The default option is to use roughly sqrt(NVars) variables.
44705 :
44706 : INPUT PARAMETERS:
44707 : S - decision forest builder object
44708 : RndVars - number of randomly selected variables; values outside
44709 : of [1,NVars] range are silently clipped.
44710 :
44711 : OUTPUT PARAMETERS:
44712 : S - decision forest builder
44713 :
44714 : -- ALGLIB --
44715 : Copyright 21.05.2018 by Bochkanov Sergey
44716 : *************************************************************************/
44717 0 : void dfbuildersetrndvars(decisionforestbuilder* s,
44718 : ae_int_t rndvars,
44719 : ae_state *_state)
44720 : {
44721 :
44722 :
44723 0 : s->rdfvars = (double)(ae_maxint(rndvars, 1, _state));
44724 0 : }
44725 :
44726 :
44727 : /*************************************************************************
44728 : This function sets number of variables used by decision forest construction
44729 : algorithm as a fraction of total variable count (0,1) range.
44730 :
44731 : The default option is to use roughly sqrt(NVars) variables.
44732 :
44733 : INPUT PARAMETERS:
44734 : S - decision forest builder object
44735 : F - round(NVars*F) variables are selected
44736 :
44737 : OUTPUT PARAMETERS:
44738 : S - decision forest builder
44739 :
44740 : -- ALGLIB --
44741 : Copyright 21.05.2018 by Bochkanov Sergey
44742 : *************************************************************************/
44743 0 : void dfbuildersetrndvarsratio(decisionforestbuilder* s,
44744 : double f,
44745 : ae_state *_state)
44746 : {
44747 :
44748 :
44749 0 : ae_assert(ae_isfinite(f, _state), "dfbuildersetrndvarsratio: F is INF or NAN", _state);
44750 0 : s->rdfvars = -ae_maxreal(f, ae_machineepsilon, _state);
44751 0 : }
44752 :
44753 :
44754 : /*************************************************************************
44755 : This function tells decision forest builder to automatically choose number
44756 : of variables used by decision forest construction algorithm. Roughly
44757 : sqrt(NVars) variables will be used.
44758 :
44759 : INPUT PARAMETERS:
44760 : S - decision forest builder object
44761 :
44762 : OUTPUT PARAMETERS:
44763 : S - decision forest builder
44764 :
44765 : -- ALGLIB --
44766 : Copyright 21.05.2018 by Bochkanov Sergey
44767 : *************************************************************************/
44768 0 : void dfbuildersetrndvarsauto(decisionforestbuilder* s, ae_state *_state)
44769 : {
44770 :
44771 :
44772 0 : s->rdfvars = (double)(0);
44773 0 : }
44774 :
44775 :
44776 : /*************************************************************************
44777 : This function sets size of dataset subsample generated the decision forest
44778 : construction algorithm. Size is specified as a fraction of total dataset
44779 : size.
44780 :
44781 : The default option is to use 50% of the dataset for training, 50% for the
44782 : OOB estimates. You can decrease fraction F down to 10%, 1% or even below
44783 : in order to reduce overfitting.
44784 :
44785 : INPUT PARAMETERS:
44786 : S - decision forest builder object
44787 : F - fraction of the dataset to use, in (0,1] range. Values
44788 : outside of this range will be silently clipped. At
44789 : least one element is always selected for the training
44790 : set.
44791 :
44792 : OUTPUT PARAMETERS:
44793 : S - decision forest builder
44794 :
44795 : -- ALGLIB --
44796 : Copyright 21.05.2018 by Bochkanov Sergey
44797 : *************************************************************************/
44798 0 : void dfbuildersetsubsampleratio(decisionforestbuilder* s,
44799 : double f,
44800 : ae_state *_state)
44801 : {
44802 :
44803 :
44804 0 : ae_assert(ae_isfinite(f, _state), "dfbuildersetrndvarsfraction: F is INF or NAN", _state);
44805 0 : s->rdfratio = ae_maxreal(f, ae_machineepsilon, _state);
44806 0 : }
44807 :
44808 :
44809 : /*************************************************************************
44810 : This function sets seed used by internal RNG for random subsampling and
44811 : random selection of variable subsets.
44812 :
44813 : By default random seed is used, i.e. every time you build decision forest,
44814 : we seed generator with new value obtained from system-wide RNG. Thus,
44815 : decision forest builder returns non-deterministic results. You can change
44816 : such behavior by specyfing fixed positive seed value.
44817 :
44818 : INPUT PARAMETERS:
44819 : S - decision forest builder object
44820 : SeedVal - seed value:
44821 : * positive values are used for seeding RNG with fixed
44822 : seed, i.e. subsequent runs on same data will return
44823 : same decision forests
44824 : * non-positive seed means that random seed is used
44825 : for every run of builder, i.e. subsequent runs on
44826 : same datasets will return slightly different
44827 : decision forests
44828 :
44829 : OUTPUT PARAMETERS:
44830 : S - decision forest builder, see
44831 :
44832 : -- ALGLIB --
44833 : Copyright 21.05.2018 by Bochkanov Sergey
44834 : *************************************************************************/
44835 0 : void dfbuildersetseed(decisionforestbuilder* s,
44836 : ae_int_t seedval,
44837 : ae_state *_state)
44838 : {
44839 :
44840 :
44841 0 : s->rdfglobalseed = seedval;
44842 0 : }
44843 :
44844 :
44845 : /*************************************************************************
44846 : This function sets random decision forest construction algorithm.
44847 :
44848 : As for now, only one decision forest construction algorithm is supported -
44849 : a dense "baseline" RDF algorithm.
44850 :
44851 : INPUT PARAMETERS:
44852 : S - decision forest builder object
44853 : AlgoType - algorithm type:
44854 : * 0 = baseline dense RDF
44855 :
44856 : OUTPUT PARAMETERS:
44857 : S - decision forest builder, see
44858 :
44859 : -- ALGLIB --
44860 : Copyright 21.05.2018 by Bochkanov Sergey
44861 : *************************************************************************/
44862 0 : void dfbuildersetrdfalgo(decisionforestbuilder* s,
44863 : ae_int_t algotype,
44864 : ae_state *_state)
44865 : {
44866 :
44867 :
44868 0 : ae_assert(algotype==0, "dfbuildersetrdfalgo: unexpected algotype", _state);
44869 0 : s->rdfalgo = algotype;
44870 0 : }
44871 :
44872 :
44873 : /*************************************************************************
44874 : This function sets split selection algorithm used by decision forest
44875 : classifier. You may choose several algorithms, with different speed and
44876 : quality of the results.
44877 :
44878 : INPUT PARAMETERS:
44879 : S - decision forest builder object
44880 : SplitStrength- split type:
44881 : * 0 = split at the random position, fastest one
44882 : * 1 = split at the middle of the range
44883 : * 2 = strong split at the best point of the range (default)
44884 :
44885 : OUTPUT PARAMETERS:
44886 : S - decision forest builder, see
44887 :
44888 : -- ALGLIB --
44889 : Copyright 21.05.2018 by Bochkanov Sergey
44890 : *************************************************************************/
44891 0 : void dfbuildersetrdfsplitstrength(decisionforestbuilder* s,
44892 : ae_int_t splitstrength,
44893 : ae_state *_state)
44894 : {
44895 :
44896 :
44897 0 : ae_assert((splitstrength==0||splitstrength==1)||splitstrength==2, "dfbuildersetrdfsplitstrength: unexpected split type", _state);
44898 0 : s->rdfsplitstrength = splitstrength;
44899 0 : }
44900 :
44901 :
44902 : /*************************************************************************
44903 : This function tells decision forest construction algorithm to use
44904 : Gini impurity based variable importance estimation (also known as MDI).
44905 :
44906 : This version of importance estimation algorithm analyzes mean decrease in
44907 : impurity (MDI) on training sample during splits. The result is divided
44908 : by impurity at the root node in order to produce estimate in [0,1] range.
44909 :
44910 : Such estimates are fast to calculate and beautifully normalized (sum to
44911 : one) but have following downsides:
44912 : * They ALWAYS sum to 1.0, even if output is completely unpredictable. I.e.
44913 : MDI allows to order variables by importance, but does not tell us about
44914 : "absolute" importances of variables
44915 : * there exist some bias towards continuous and high-cardinality categorical
44916 : variables
44917 :
44918 : NOTE: informally speaking, MDA (permutation importance) rating answers the
44919 : question "what part of the model predictive power is ruined by
44920 : permuting k-th variable?" while MDI tells us "what part of the model
44921 : predictive power was achieved due to usage of k-th variable".
44922 :
44923 : Thus, MDA rates each variable independently at "0 to 1" scale while
44924 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
44925 : between several important variables.
44926 :
44927 : If all variables are equally important, they will have same
44928 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
44929 : However, roughly same picture will be produced for the "all
44930 : variables provide information no one is critical" situation and for
44931 : the "all variables are critical, drop any one, everything is ruined"
44932 : situation.
44933 :
44934 : Contrary to that, MDA will rate critical variable as ~1.0 important,
44935 : and important but non-critical variable will have less than unit
44936 : rating.
44937 :
44938 : NOTE: quite an often MDA and MDI return same results. It generally happens
44939 : on problems with low test set error (a few percents at most) and
44940 : large enough training set to avoid overfitting.
44941 :
44942 : The difference between MDA, MDI and OOB-MDI becomes important only
44943 : on "hard" tasks with high test set error and/or small training set.
44944 :
44945 : INPUT PARAMETERS:
44946 : S - decision forest builder object
44947 :
44948 : OUTPUT PARAMETERS:
44949 : S - decision forest builder object. Next call to the forest
44950 : construction function will produce:
44951 : * importance estimates in rep.varimportances field
44952 : * variable ranks in rep.topvars field
44953 :
44954 : -- ALGLIB --
44955 : Copyright 29.07.2019 by Bochkanov Sergey
44956 : *************************************************************************/
44957 0 : void dfbuildersetimportancetrngini(decisionforestbuilder* s,
44958 : ae_state *_state)
44959 : {
44960 :
44961 :
44962 0 : s->rdfimportance = dforest_needtrngini;
44963 0 : }
44964 :
44965 :
44966 : /*************************************************************************
44967 : This function tells decision forest construction algorithm to use
44968 : out-of-bag version of Gini variable importance estimation (also known as
44969 : OOB-MDI).
44970 :
44971 : This version of importance estimation algorithm analyzes mean decrease in
44972 : impurity (MDI) on out-of-bag sample during splits. The result is divided
44973 : by impurity at the root node in order to produce estimate in [0,1] range.
44974 :
44975 : Such estimates are fast to calculate and resistant to overfitting issues
44976 : (thanks to the out-of-bag estimates used). However, OOB Gini rating has
44977 : following downsides:
44978 : * there exist some bias towards continuous and high-cardinality categorical
44979 : variables
44980 : * Gini rating allows us to order variables by importance, but it is hard
44981 : to define importance of the variable by itself.
44982 :
44983 : NOTE: informally speaking, MDA (permutation importance) rating answers the
44984 : question "what part of the model predictive power is ruined by
44985 : permuting k-th variable?" while MDI tells us "what part of the model
44986 : predictive power was achieved due to usage of k-th variable".
44987 :
44988 : Thus, MDA rates each variable independently at "0 to 1" scale while
44989 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
44990 : between several important variables.
44991 :
44992 : If all variables are equally important, they will have same
44993 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
44994 : However, roughly same picture will be produced for the "all
44995 : variables provide information no one is critical" situation and for
44996 : the "all variables are critical, drop any one, everything is ruined"
44997 : situation.
44998 :
44999 : Contrary to that, MDA will rate critical variable as ~1.0 important,
45000 : and important but non-critical variable will have less than unit
45001 : rating.
45002 :
45003 : NOTE: quite an often MDA and MDI return same results. It generally happens
45004 : on problems with low test set error (a few percents at most) and
45005 : large enough training set to avoid overfitting.
45006 :
45007 : The difference between MDA, MDI and OOB-MDI becomes important only
45008 : on "hard" tasks with high test set error and/or small training set.
45009 :
45010 : INPUT PARAMETERS:
45011 : S - decision forest builder object
45012 :
45013 : OUTPUT PARAMETERS:
45014 : S - decision forest builder object. Next call to the forest
45015 : construction function will produce:
45016 : * importance estimates in rep.varimportances field
45017 : * variable ranks in rep.topvars field
45018 :
45019 : -- ALGLIB --
45020 : Copyright 29.07.2019 by Bochkanov Sergey
45021 : *************************************************************************/
45022 0 : void dfbuildersetimportanceoobgini(decisionforestbuilder* s,
45023 : ae_state *_state)
45024 : {
45025 :
45026 :
45027 0 : s->rdfimportance = dforest_needoobgini;
45028 0 : }
45029 :
45030 :
45031 : /*************************************************************************
45032 : This function tells decision forest construction algorithm to use
45033 : permutation variable importance estimator (also known as MDA).
45034 :
45035 : This version of importance estimation algorithm analyzes mean increase in
45036 : out-of-bag sum of squared residuals after random permutation of J-th
45037 : variable. The result is divided by error computed with all variables being
45038 : perturbed in order to produce R-squared-like estimate in [0,1] range.
45039 :
45040 : Such estimate is slower to calculate than Gini-based rating because it
45041 : needs multiple inference runs for each of variables being studied.
45042 :
45043 : ALGLIB uses parallelized and highly optimized algorithm which analyzes
45044 : path through the decision tree and allows to handle most perturbations
45045 : in O(1) time; nevertheless, requesting MDA importances may increase forest
45046 : construction time from 10% to 200% (or more, if you have thousands of
45047 : variables).
45048 :
45049 : However, MDA rating has following benefits over Gini-based ones:
45050 : * no bias towards specific variable types
45051 : * ability to directly evaluate "absolute" importance of some variable at
45052 : "0 to 1" scale (contrary to Gini-based rating, which returns comparative
45053 : importances).
45054 :
45055 : NOTE: informally speaking, MDA (permutation importance) rating answers the
45056 : question "what part of the model predictive power is ruined by
45057 : permuting k-th variable?" while MDI tells us "what part of the model
45058 : predictive power was achieved due to usage of k-th variable".
45059 :
45060 : Thus, MDA rates each variable independently at "0 to 1" scale while
45061 : MDI (and OOB-MDI too) tends to divide "unit amount of importance"
45062 : between several important variables.
45063 :
45064 : If all variables are equally important, they will have same
45065 : MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
45066 : However, roughly same picture will be produced for the "all
45067 : variables provide information no one is critical" situation and for
45068 : the "all variables are critical, drop any one, everything is ruined"
45069 : situation.
45070 :
45071 : Contrary to that, MDA will rate critical variable as ~1.0 important,
45072 : and important but non-critical variable will have less than unit
45073 : rating.
45074 :
45075 : NOTE: quite an often MDA and MDI return same results. It generally happens
45076 : on problems with low test set error (a few percents at most) and
45077 : large enough training set to avoid overfitting.
45078 :
45079 : The difference between MDA, MDI and OOB-MDI becomes important only
45080 : on "hard" tasks with high test set error and/or small training set.
45081 :
45082 : INPUT PARAMETERS:
45083 : S - decision forest builder object
45084 :
45085 : OUTPUT PARAMETERS:
45086 : S - decision forest builder object. Next call to the forest
45087 : construction function will produce:
45088 : * importance estimates in rep.varimportances field
45089 : * variable ranks in rep.topvars field
45090 :
45091 : -- ALGLIB --
45092 : Copyright 29.07.2019 by Bochkanov Sergey
45093 : *************************************************************************/
45094 0 : void dfbuildersetimportancepermutation(decisionforestbuilder* s,
45095 : ae_state *_state)
45096 : {
45097 :
45098 :
45099 0 : s->rdfimportance = dforest_needpermutation;
45100 0 : }
45101 :
45102 :
45103 : /*************************************************************************
45104 : This function tells decision forest construction algorithm to skip
45105 : variable importance estimation.
45106 :
45107 : INPUT PARAMETERS:
45108 : S - decision forest builder object
45109 :
45110 : OUTPUT PARAMETERS:
45111 : S - decision forest builder object. Next call to the forest
45112 : construction function will result in forest being built
45113 : without variable importance estimation.
45114 :
45115 : -- ALGLIB --
45116 : Copyright 29.07.2019 by Bochkanov Sergey
45117 : *************************************************************************/
45118 0 : void dfbuildersetimportancenone(decisionforestbuilder* s,
45119 : ae_state *_state)
45120 : {
45121 :
45122 :
45123 0 : s->rdfimportance = 0;
45124 0 : }
45125 :
45126 :
45127 : /*************************************************************************
45128 : This function is an alias for dfbuilderpeekprogress(), left in ALGLIB for
45129 : backward compatibility reasons.
45130 :
45131 : -- ALGLIB --
45132 : Copyright 21.05.2018 by Bochkanov Sergey
45133 : *************************************************************************/
45134 0 : double dfbuildergetprogress(decisionforestbuilder* s, ae_state *_state)
45135 : {
45136 : double result;
45137 :
45138 :
45139 0 : result = dfbuilderpeekprogress(s, _state);
45140 0 : return result;
45141 : }
45142 :
45143 :
45144 : /*************************************************************************
45145 : This function is used to peek into decision forest construction process
45146 : from some other thread and get current progress indicator.
45147 :
45148 : It returns value in [0,1].
45149 :
45150 : INPUT PARAMETERS:
45151 : S - decision forest builder object used to build forest
45152 : in some other thread
45153 :
45154 : RESULT:
45155 : progress value, in [0,1]
45156 :
45157 : -- ALGLIB --
45158 : Copyright 21.05.2018 by Bochkanov Sergey
45159 : *************************************************************************/
45160 0 : double dfbuilderpeekprogress(decisionforestbuilder* s, ae_state *_state)
45161 : {
45162 : double result;
45163 :
45164 :
45165 0 : result = s->rdfprogress/ae_maxreal((double)(s->rdftotal), (double)(1), _state);
45166 0 : result = ae_maxreal(result, (double)(0), _state);
45167 0 : result = ae_minreal(result, (double)(1), _state);
45168 0 : return result;
45169 : }
45170 :
45171 :
45172 : /*************************************************************************
45173 : This subroutine builds decision forest according to current settings using
45174 : dataset internally stored in the builder object. Dense algorithm is used.
45175 :
45176 : NOTE: this function uses dense algorithm for forest construction
45177 : independently from the dataset format (dense or sparse).
45178 :
45179 : NOTE: forest built with this function is stored in-memory using 64-bit
45180 : data structures for offsets/indexes/split values. It is possible to
45181 : convert forest into more memory-efficient compressed binary
45182 : representation. Depending on the problem properties, 3.7x-5.7x
45183 : compression factors are possible.
45184 :
45185 : The downsides of compression are (a) slight reduction in the model
45186 : accuracy and (b) ~1.5x reduction in the inference speed (due to
45187 : increased complexity of the storage format).
45188 :
45189 : See comments on dfbinarycompression() for more info.
45190 :
45191 : Default settings are used by the algorithm; you can tweak them with the
45192 : help of the following functions:
45193 : * dfbuildersetrfactor() - to control a fraction of the dataset used for
45194 : subsampling
45195 : * dfbuildersetrandomvars() - to control number of variables randomly chosen
45196 : for decision rule creation
45197 :
45198 : ! COMMERCIAL EDITION OF ALGLIB:
45199 : !
45200 : ! Commercial Edition of ALGLIB includes following important improvements
45201 : ! of this function:
45202 : ! * high-performance native backend with same C# interface (C# version)
45203 : ! * multithreading support (C++ and C# versions)
45204 : !
45205 : ! We recommend you to read 'Working with commercial version' section of
45206 : ! ALGLIB Reference Manual in order to find out how to use performance-
45207 : ! related features provided by commercial edition of ALGLIB.
45208 :
45209 : INPUT PARAMETERS:
45210 : S - decision forest builder object
45211 : NTrees - NTrees>=1, number of trees to train
45212 :
45213 : OUTPUT PARAMETERS:
45214 : DF - decision forest. You can compress this forest to more
45215 : compact 16-bit representation with dfbinarycompression()
45216 : Rep - report, see below for information on its fields.
45217 :
45218 : === report information produced by forest construction function ==========
45219 :
45220 : Decision forest training report includes following information:
45221 : * training set errors
45222 : * out-of-bag estimates of errors
45223 : * variable importance ratings
45224 :
45225 : Following fields are used to store information:
45226 : * training set errors are stored in rep.relclserror, rep.avgce, rep.rmserror,
45227 : rep.avgerror and rep.avgrelerror
45228 : * out-of-bag estimates of errors are stored in rep.oobrelclserror, rep.oobavgce,
45229 : rep.oobrmserror, rep.oobavgerror and rep.oobavgrelerror
45230 :
45231 : Variable importance reports, if requested by dfbuildersetimportancegini(),
45232 : dfbuildersetimportancetrngini() or dfbuildersetimportancepermutation()
45233 : call, are stored in:
45234 : * rep.varimportances field stores importance ratings
45235 : * rep.topvars stores variable indexes ordered from the most important to
45236 : less important ones
45237 :
45238 : You can find more information about report fields in:
45239 : * comments on dfreport structure
45240 : * comments on dfbuildersetimportancegini function
45241 : * comments on dfbuildersetimportancetrngini function
45242 : * comments on dfbuildersetimportancepermutation function
45243 :
45244 : -- ALGLIB --
45245 : Copyright 21.05.2018 by Bochkanov Sergey
45246 : *************************************************************************/
45247 0 : void dfbuilderbuildrandomforest(decisionforestbuilder* s,
45248 : ae_int_t ntrees,
45249 : decisionforest* df,
45250 : dfreport* rep,
45251 : ae_state *_state)
45252 : {
45253 : ae_frame _frame_block;
45254 : ae_int_t i;
45255 : ae_int_t j;
45256 : ae_int_t nvars;
45257 : ae_int_t nclasses;
45258 : ae_int_t npoints;
45259 : ae_int_t trnsize;
45260 : ae_int_t maxtreesize;
45261 : ae_int_t sessionseed;
45262 : dfworkbuf workbufseed;
45263 : dfvotebuf votebufseed;
45264 : dftreebuf treebufseed;
45265 :
45266 0 : ae_frame_make(_state, &_frame_block);
45267 0 : memset(&workbufseed, 0, sizeof(workbufseed));
45268 0 : memset(&votebufseed, 0, sizeof(votebufseed));
45269 0 : memset(&treebufseed, 0, sizeof(treebufseed));
45270 0 : _decisionforest_clear(df);
45271 0 : _dfreport_clear(rep);
45272 0 : _dfworkbuf_init(&workbufseed, _state, ae_true);
45273 0 : _dfvotebuf_init(&votebufseed, _state, ae_true);
45274 0 : _dftreebuf_init(&treebufseed, _state, ae_true);
45275 :
45276 0 : ae_assert(ntrees>=1, "DFBuilderBuildRandomForest: ntrees<1", _state);
45277 0 : dforest_cleanreport(s, rep, _state);
45278 0 : npoints = s->npoints;
45279 0 : nvars = s->nvars;
45280 0 : nclasses = s->nclasses;
45281 :
45282 : /*
45283 : * Set up progress counter
45284 : */
45285 0 : s->rdfprogress = 0;
45286 0 : s->rdftotal = ntrees*npoints;
45287 0 : if( s->rdfimportance==dforest_needpermutation )
45288 : {
45289 0 : s->rdftotal = s->rdftotal+ntrees*npoints;
45290 : }
45291 :
45292 : /*
45293 : * Quick exit for empty dataset
45294 : */
45295 0 : if( s->dstype==-1||npoints==0 )
45296 : {
45297 0 : ae_assert(dforest_leafnodewidth==2, "DFBuilderBuildRandomForest: integrity check failed", _state);
45298 0 : df->forestformat = dforest_dfuncompressedv0;
45299 0 : df->nvars = s->nvars;
45300 0 : df->nclasses = s->nclasses;
45301 0 : df->ntrees = 1;
45302 0 : df->bufsize = 1+dforest_leafnodewidth;
45303 0 : ae_vector_set_length(&df->trees, 1+dforest_leafnodewidth, _state);
45304 0 : df->trees.ptr.p_double[0] = (double)(1+dforest_leafnodewidth);
45305 0 : df->trees.ptr.p_double[1] = (double)(-1);
45306 0 : df->trees.ptr.p_double[2] = 0.0;
45307 0 : dfcreatebuffer(df, &df->buffer, _state);
45308 0 : ae_frame_leave(_state);
45309 0 : return;
45310 : }
45311 0 : ae_assert(npoints>0, "DFBuilderBuildRandomForest: integrity check failed", _state);
45312 :
45313 : /*
45314 : * Analyze dataset statistics, perform preprocessing
45315 : */
45316 0 : dforest_analyzeandpreprocessdataset(s, _state);
45317 :
45318 : /*
45319 : * Prepare "work", "vote" and "tree" pools and other settings
45320 : */
45321 0 : trnsize = ae_round(npoints*s->rdfratio, _state);
45322 0 : trnsize = ae_maxint(trnsize, 1, _state);
45323 0 : trnsize = ae_minint(trnsize, npoints, _state);
45324 0 : maxtreesize = 1+dforest_innernodewidth*(trnsize-1)+dforest_leafnodewidth*trnsize;
45325 0 : ae_vector_set_length(&workbufseed.varpool, nvars, _state);
45326 0 : ae_vector_set_length(&workbufseed.trnset, trnsize, _state);
45327 0 : ae_vector_set_length(&workbufseed.oobset, npoints-trnsize, _state);
45328 0 : ae_vector_set_length(&workbufseed.tmp0i, npoints, _state);
45329 0 : ae_vector_set_length(&workbufseed.tmp1i, npoints, _state);
45330 0 : ae_vector_set_length(&workbufseed.tmp0r, npoints, _state);
45331 0 : ae_vector_set_length(&workbufseed.tmp1r, npoints, _state);
45332 0 : ae_vector_set_length(&workbufseed.tmp2r, npoints, _state);
45333 0 : ae_vector_set_length(&workbufseed.tmp3r, npoints, _state);
45334 0 : ae_vector_set_length(&workbufseed.trnlabelsi, npoints, _state);
45335 0 : ae_vector_set_length(&workbufseed.trnlabelsr, npoints, _state);
45336 0 : ae_vector_set_length(&workbufseed.ooblabelsi, npoints, _state);
45337 0 : ae_vector_set_length(&workbufseed.ooblabelsr, npoints, _state);
45338 0 : ae_vector_set_length(&workbufseed.curvals, npoints, _state);
45339 0 : ae_vector_set_length(&workbufseed.bestvals, npoints, _state);
45340 0 : ae_vector_set_length(&workbufseed.classpriors, nclasses, _state);
45341 0 : ae_vector_set_length(&workbufseed.classtotals0, nclasses, _state);
45342 0 : ae_vector_set_length(&workbufseed.classtotals1, nclasses, _state);
45343 0 : ae_vector_set_length(&workbufseed.classtotals01, 2*nclasses, _state);
45344 0 : ae_vector_set_length(&workbufseed.treebuf, maxtreesize, _state);
45345 0 : workbufseed.trnsize = trnsize;
45346 0 : workbufseed.oobsize = npoints-trnsize;
45347 0 : ae_vector_set_length(&votebufseed.trntotals, npoints*nclasses, _state);
45348 0 : ae_vector_set_length(&votebufseed.oobtotals, npoints*nclasses, _state);
45349 0 : for(i=0; i<=npoints*nclasses-1; i++)
45350 : {
45351 0 : votebufseed.trntotals.ptr.p_double[i] = (double)(0);
45352 0 : votebufseed.oobtotals.ptr.p_double[i] = (double)(0);
45353 : }
45354 0 : ae_vector_set_length(&votebufseed.trncounts, npoints, _state);
45355 0 : ae_vector_set_length(&votebufseed.oobcounts, npoints, _state);
45356 0 : for(i=0; i<=npoints-1; i++)
45357 : {
45358 0 : votebufseed.trncounts.ptr.p_int[i] = 0;
45359 0 : votebufseed.oobcounts.ptr.p_int[i] = 0;
45360 : }
45361 0 : ae_vector_set_length(&votebufseed.giniimportances, nvars, _state);
45362 0 : for(i=0; i<=nvars-1; i++)
45363 : {
45364 0 : votebufseed.giniimportances.ptr.p_double[i] = 0.0;
45365 : }
45366 0 : treebufseed.treeidx = -1;
45367 0 : ae_shared_pool_set_seed(&s->workpool, &workbufseed, sizeof(workbufseed), _dfworkbuf_init, _dfworkbuf_init_copy, _dfworkbuf_destroy, _state);
45368 0 : ae_shared_pool_set_seed(&s->votepool, &votebufseed, sizeof(votebufseed), _dfvotebuf_init, _dfvotebuf_init_copy, _dfvotebuf_destroy, _state);
45369 0 : ae_shared_pool_set_seed(&s->treepool, &treebufseed, sizeof(treebufseed), _dftreebuf_init, _dftreebuf_init_copy, _dftreebuf_destroy, _state);
45370 0 : ae_shared_pool_set_seed(&s->treefactory, &treebufseed, sizeof(treebufseed), _dftreebuf_init, _dftreebuf_init_copy, _dftreebuf_destroy, _state);
45371 :
45372 : /*
45373 : * Select session seed (individual trees are constructed using
45374 : * combination of session and local seeds).
45375 : */
45376 0 : sessionseed = s->rdfglobalseed;
45377 0 : if( s->rdfglobalseed<=0 )
45378 : {
45379 0 : sessionseed = ae_randominteger(30000, _state);
45380 : }
45381 :
45382 : /*
45383 : * Prepare In-and-Out-of-Bag matrix, if needed
45384 : */
45385 0 : s->neediobmatrix = s->rdfimportance==dforest_needpermutation;
45386 0 : if( s->neediobmatrix )
45387 : {
45388 :
45389 : /*
45390 : * Prepare default state of In-and-Out-of-Bag matrix
45391 : */
45392 0 : bmatrixsetlengthatleast(&s->iobmatrix, ntrees, npoints, _state);
45393 0 : for(i=0; i<=ntrees-1; i++)
45394 : {
45395 0 : for(j=0; j<=npoints-1; j++)
45396 : {
45397 0 : s->iobmatrix.ptr.pp_bool[i][j] = ae_false;
45398 : }
45399 : }
45400 : }
45401 :
45402 : /*
45403 : * Build trees (in parallel, if possible)
45404 : */
45405 0 : dforest_buildrandomtree(s, 0, ntrees, _state);
45406 :
45407 : /*
45408 : * Merge trees and output result
45409 : */
45410 0 : dforest_mergetrees(s, df, _state);
45411 :
45412 : /*
45413 : * Process voting results and output training set and OOB errors.
45414 : * Finalize tree construction.
45415 : */
45416 0 : dforest_processvotingresults(s, ntrees, &votebufseed, rep, _state);
45417 0 : dfcreatebuffer(df, &df->buffer, _state);
45418 :
45419 : /*
45420 : * Perform variable importance estimation
45421 : */
45422 0 : dforest_estimatevariableimportance(s, sessionseed, df, ntrees, rep, _state);
45423 :
45424 : /*
45425 : * Update progress counter
45426 : */
45427 0 : s->rdfprogress = s->rdftotal;
45428 0 : ae_frame_leave(_state);
45429 : }
45430 :
45431 :
45432 : /*************************************************************************
45433 : This function performs binary compression of the decision forest.
45434 :
45435 : Original decision forest produced by the forest builder is stored using
45436 : 64-bit representation for all numbers - offsets, variable indexes, split
45437 : points.
45438 :
45439 : It is possible to significantly reduce model size by means of:
45440 : * using compressed dynamic encoding for integers (offsets and variable
45441 : indexes), which uses just 1 byte to store small ints (less than 128),
45442 : just 2 bytes for larger values (less than 128^2) and so on
45443 : * storing floating point numbers using 8-bit exponent and 16-bit mantissa
45444 :
45445 : As result, model needs significantly less memory (compression factor
45446 : depends on variable and class counts). In particular:
45447 : * NVars<128 and NClasses<128 result in 4.4x-5.7x model size reduction
45448 : * NVars<16384 and NClasses<128 result in 3.7x-4.5x model size reduction
45449 :
45450 : Such storage format performs lossless compression of all integers, but
45451 : compression of floating point values (split values) is lossy, with roughly
45452 : 0.01% relative error introduced during rounding. Thus, we recommend you to
45453 : re-evaluate model accuracy after compression.
45454 :
45455 : Another downside of compression is ~1.5x reduction in the inference
45456 : speed due to necessity of dynamic decompression of the compressed model.
45457 :
45458 : INPUT PARAMETERS:
45459 : DF - decision forest built by forest builder
45460 :
45461 : OUTPUT PARAMETERS:
45462 : DF - replaced by compressed forest
45463 :
45464 : RESULT:
45465 : compression factor (in-RAM size of the compressed model vs than of the
45466 : uncompressed one), positive number larger than 1.0
45467 :
45468 : -- ALGLIB --
45469 : Copyright 22.07.2019 by Bochkanov Sergey
45470 : *************************************************************************/
45471 0 : double dfbinarycompression(decisionforest* df, ae_state *_state)
45472 : {
45473 : double result;
45474 :
45475 :
45476 0 : result = dforest_binarycompression(df, ae_false, _state);
45477 0 : return result;
45478 : }
45479 :
45480 :
45481 : /*************************************************************************
45482 : This is a 8-bit version of dfbinarycompression.
45483 : Not recommended for external use because it is too lossy.
45484 :
45485 : -- ALGLIB --
45486 : Copyright 22.07.2019 by Bochkanov Sergey
45487 : *************************************************************************/
45488 0 : double dfbinarycompression8(decisionforest* df, ae_state *_state)
45489 : {
45490 : double result;
45491 :
45492 :
45493 0 : result = dforest_binarycompression(df, ae_true, _state);
45494 0 : return result;
45495 : }
45496 :
45497 :
45498 : /*************************************************************************
45499 : Inference using decision forest
45500 :
45501 : IMPORTANT: this function is thread-unsafe and may modify internal
45502 : structures of the model! You can not use same model object for
45503 : parallel evaluation from several threads.
45504 :
45505 : Use dftsprocess() with independent thread-local buffers if
45506 : you need thread-safe evaluation.
45507 :
45508 : INPUT PARAMETERS:
45509 : DF - decision forest model
45510 : X - input vector, array[NVars]
45511 : Y - possibly preallocated buffer, reallocated if too small
45512 :
45513 : OUTPUT PARAMETERS:
45514 : Y - result. Regression estimate when solving regression task,
45515 : vector of posterior probabilities for classification task.
45516 :
45517 : See also DFProcessI.
45518 :
45519 :
45520 : -- ALGLIB --
45521 : Copyright 16.02.2009 by Bochkanov Sergey
45522 : *************************************************************************/
45523 0 : void dfprocess(decisionforest* df,
45524 : /* Real */ ae_vector* x,
45525 : /* Real */ ae_vector* y,
45526 : ae_state *_state)
45527 : {
45528 : ae_int_t offs;
45529 : ae_int_t i;
45530 : double v;
45531 : ae_int_t treesize;
45532 : ae_bool processed;
45533 :
45534 :
45535 :
45536 : /*
45537 : * Process
45538 : *
45539 : * Although comments above warn you about thread-unsafety of this
45540 : * function, it is de facto thread-safe. However, thread safety is
45541 : * an accidental side-effect of the specific inference algorithm
45542 : * being used. It may disappear in the future versions of the DF
45543 : * models, so you should NOT rely on it.
45544 : */
45545 0 : if( y->cnt<df->nclasses )
45546 : {
45547 0 : ae_vector_set_length(y, df->nclasses, _state);
45548 : }
45549 0 : for(i=0; i<=df->nclasses-1; i++)
45550 : {
45551 0 : y->ptr.p_double[i] = (double)(0);
45552 : }
45553 0 : processed = ae_false;
45554 0 : if( df->forestformat==dforest_dfuncompressedv0 )
45555 : {
45556 :
45557 : /*
45558 : * Process trees stored in uncompressed format
45559 : */
45560 0 : offs = 0;
45561 0 : for(i=0; i<=df->ntrees-1; i++)
45562 : {
45563 0 : dforest_dfprocessinternaluncompressed(df, offs, offs+1, x, y, _state);
45564 0 : offs = offs+ae_round(df->trees.ptr.p_double[offs], _state);
45565 : }
45566 0 : processed = ae_true;
45567 : }
45568 0 : if( df->forestformat==dforest_dfcompressedv0 )
45569 : {
45570 :
45571 : /*
45572 : * Process trees stored in compressed format
45573 : */
45574 0 : offs = 0;
45575 0 : for(i=0; i<=df->ntrees-1; i++)
45576 : {
45577 0 : treesize = dforest_unstreamuint(&df->trees8, &offs, _state);
45578 0 : dforest_dfprocessinternalcompressed(df, offs, x, y, _state);
45579 0 : offs = offs+treesize;
45580 : }
45581 0 : processed = ae_true;
45582 : }
45583 0 : ae_assert(processed, "DFProcess: integrity check failed (unexpected format?)", _state);
45584 0 : v = (double)1/(double)df->ntrees;
45585 0 : ae_v_muld(&y->ptr.p_double[0], 1, ae_v_len(0,df->nclasses-1), v);
45586 0 : }
45587 :
45588 :
45589 : /*************************************************************************
45590 : 'interactive' variant of DFProcess for languages like Python which support
45591 : constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter
45592 :
45593 : This function allocates new array on each call, so it is significantly
45594 : slower than its 'non-interactive' counterpart, but it is more convenient
45595 : when you call it from command line.
45596 :
45597 : IMPORTANT: this function is thread-unsafe and may modify internal
45598 : structures of the model! You can not use same model object for
45599 : parallel evaluation from several threads.
45600 :
45601 : Use dftsprocess() with independent thread-local buffers if
45602 : you need thread-safe evaluation.
45603 :
45604 : -- ALGLIB --
45605 : Copyright 28.02.2010 by Bochkanov Sergey
45606 : *************************************************************************/
45607 0 : void dfprocessi(decisionforest* df,
45608 : /* Real */ ae_vector* x,
45609 : /* Real */ ae_vector* y,
45610 : ae_state *_state)
45611 : {
45612 :
45613 0 : ae_vector_clear(y);
45614 :
45615 0 : dfprocess(df, x, y, _state);
45616 0 : }
45617 :
45618 :
45619 : /*************************************************************************
45620 : This function returns first component of the inferred vector (i.e. one
45621 : with index #0).
45622 :
45623 : It is a convenience wrapper for dfprocess() intended for either:
45624 : * 1-dimensional regression problems
45625 : * 2-class classification problems
45626 :
45627 : In the former case this function returns inference result as scalar, which
45628 : is definitely more convenient that wrapping it as vector. In the latter
45629 : case it returns probability of object belonging to class #0.
45630 :
45631 : If you call it for anything different from two cases above, it will work
45632 : as defined, i.e. return y[0], although it is of less use in such cases.
45633 :
45634 : IMPORTANT: this function is thread-unsafe and modifies internal structures
45635 : of the model! You can not use same model object for parallel
45636 : evaluation from several threads.
45637 :
45638 : Use dftsprocess() with independent thread-local buffers, if
45639 : you need thread-safe evaluation.
45640 :
45641 : INPUT PARAMETERS:
45642 : Model - DF model
45643 : X - input vector, array[0..NVars-1].
45644 :
45645 : RESULT:
45646 : Y[0]
45647 :
45648 : -- ALGLIB --
45649 : Copyright 15.02.2019 by Bochkanov Sergey
45650 : *************************************************************************/
45651 0 : double dfprocess0(decisionforest* model,
45652 : /* Real */ ae_vector* x,
45653 : ae_state *_state)
45654 : {
45655 : ae_int_t i;
45656 : ae_int_t nvars;
45657 : double result;
45658 :
45659 :
45660 0 : nvars = model->nvars;
45661 0 : for(i=0; i<=nvars-1; i++)
45662 : {
45663 0 : model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
45664 : }
45665 0 : dfprocess(model, &model->buffer.x, &model->buffer.y, _state);
45666 0 : result = model->buffer.y.ptr.p_double[0];
45667 0 : return result;
45668 : }
45669 :
45670 :
45671 : /*************************************************************************
45672 : This function returns most probable class number for an input X. It is
45673 : same as calling dfprocess(model,x,y), then determining i=argmax(y[i]) and
45674 : returning i.
45675 :
45676 : A class number in [0,NOut) range in returned for classification problems,
45677 : -1 is returned when this function is called for regression problems.
45678 :
45679 : IMPORTANT: this function is thread-unsafe and modifies internal structures
45680 : of the model! You can not use same model object for parallel
45681 : evaluation from several threads.
45682 :
45683 : Use dftsprocess() with independent thread-local buffers, if
45684 : you need thread-safe evaluation.
45685 :
45686 : INPUT PARAMETERS:
45687 : Model - decision forest model
45688 : X - input vector, array[0..NVars-1].
45689 :
45690 : RESULT:
45691 : class number, -1 for regression tasks
45692 :
45693 : -- ALGLIB --
45694 : Copyright 15.02.2019 by Bochkanov Sergey
45695 : *************************************************************************/
45696 0 : ae_int_t dfclassify(decisionforest* model,
45697 : /* Real */ ae_vector* x,
45698 : ae_state *_state)
45699 : {
45700 : ae_int_t i;
45701 : ae_int_t nvars;
45702 : ae_int_t nout;
45703 : ae_int_t result;
45704 :
45705 :
45706 0 : if( model->nclasses<2 )
45707 : {
45708 0 : result = -1;
45709 0 : return result;
45710 : }
45711 0 : nvars = model->nvars;
45712 0 : nout = model->nclasses;
45713 0 : for(i=0; i<=nvars-1; i++)
45714 : {
45715 0 : model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
45716 : }
45717 0 : dfprocess(model, &model->buffer.x, &model->buffer.y, _state);
45718 0 : result = 0;
45719 0 : for(i=1; i<=nout-1; i++)
45720 : {
45721 0 : if( model->buffer.y.ptr.p_double[i]>model->buffer.y.ptr.p_double[result] )
45722 : {
45723 0 : result = i;
45724 : }
45725 : }
45726 0 : return result;
45727 : }
45728 :
45729 :
45730 : /*************************************************************************
45731 : Inference using decision forest
45732 :
45733 : Thread-safe procesing using external buffer for temporaries.
45734 :
45735 : This function is thread-safe (i.e . you can use same DF model from
45736 : multiple threads) as long as you use different buffer objects for different
45737 : threads.
45738 :
45739 : INPUT PARAMETERS:
45740 : DF - decision forest model
45741 : Buf - buffer object, must be allocated specifically for this
45742 : model with dfcreatebuffer().
45743 : X - input vector, array[NVars]
45744 : Y - possibly preallocated buffer, reallocated if too small
45745 :
45746 : OUTPUT PARAMETERS:
45747 : Y - result. Regression estimate when solving regression task,
45748 : vector of posterior probabilities for classification task.
45749 :
45750 : See also DFProcessI.
45751 :
45752 :
45753 : -- ALGLIB --
45754 : Copyright 16.02.2009 by Bochkanov Sergey
45755 : *************************************************************************/
45756 0 : void dftsprocess(decisionforest* df,
45757 : decisionforestbuffer* buf,
45758 : /* Real */ ae_vector* x,
45759 : /* Real */ ae_vector* y,
45760 : ae_state *_state)
45761 : {
45762 :
45763 :
45764 :
45765 : /*
45766 : * Although docs warn you about thread-unsafety of the dfprocess()
45767 : * function, it is de facto thread-safe. However, thread safety is
45768 : * an accidental side-effect of the specific inference algorithm
45769 : * being used. It may disappear in the future versions of the DF
45770 : * models, so you should NOT rely on it.
45771 : */
45772 0 : dfprocess(df, x, y, _state);
45773 0 : }
45774 :
45775 :
45776 : /*************************************************************************
45777 : Relative classification error on the test set
45778 :
45779 : INPUT PARAMETERS:
45780 : DF - decision forest model
45781 : XY - test set
45782 : NPoints - test set size
45783 :
45784 : RESULT:
45785 : percent of incorrectly classified cases.
45786 : Zero if model solves regression task.
45787 :
45788 : -- ALGLIB --
45789 : Copyright 16.02.2009 by Bochkanov Sergey
45790 : *************************************************************************/
45791 0 : double dfrelclserror(decisionforest* df,
45792 : /* Real */ ae_matrix* xy,
45793 : ae_int_t npoints,
45794 : ae_state *_state)
45795 : {
45796 : double result;
45797 :
45798 :
45799 0 : result = (double)dforest_dfclserror(df, xy, npoints, _state)/(double)npoints;
45800 0 : return result;
45801 : }
45802 :
45803 :
45804 : /*************************************************************************
45805 : Average cross-entropy (in bits per element) on the test set
45806 :
45807 : INPUT PARAMETERS:
45808 : DF - decision forest model
45809 : XY - test set
45810 : NPoints - test set size
45811 :
45812 : RESULT:
45813 : CrossEntropy/(NPoints*LN(2)).
45814 : Zero if model solves regression task.
45815 :
45816 : -- ALGLIB --
45817 : Copyright 16.02.2009 by Bochkanov Sergey
45818 : *************************************************************************/
45819 0 : double dfavgce(decisionforest* df,
45820 : /* Real */ ae_matrix* xy,
45821 : ae_int_t npoints,
45822 : ae_state *_state)
45823 : {
45824 : ae_frame _frame_block;
45825 : ae_vector x;
45826 : ae_vector y;
45827 : ae_int_t i;
45828 : ae_int_t j;
45829 : ae_int_t k;
45830 : ae_int_t tmpi;
45831 : double result;
45832 :
45833 0 : ae_frame_make(_state, &_frame_block);
45834 0 : memset(&x, 0, sizeof(x));
45835 0 : memset(&y, 0, sizeof(y));
45836 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
45837 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
45838 :
45839 0 : ae_vector_set_length(&x, df->nvars-1+1, _state);
45840 0 : ae_vector_set_length(&y, df->nclasses-1+1, _state);
45841 0 : result = (double)(0);
45842 0 : for(i=0; i<=npoints-1; i++)
45843 : {
45844 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
45845 0 : dfprocess(df, &x, &y, _state);
45846 0 : if( df->nclasses>1 )
45847 : {
45848 :
45849 : /*
45850 : * classification-specific code
45851 : */
45852 0 : k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
45853 0 : tmpi = 0;
45854 0 : for(j=1; j<=df->nclasses-1; j++)
45855 : {
45856 0 : if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
45857 : {
45858 0 : tmpi = j;
45859 : }
45860 : }
45861 0 : if( ae_fp_neq(y.ptr.p_double[k],(double)(0)) )
45862 : {
45863 0 : result = result-ae_log(y.ptr.p_double[k], _state);
45864 : }
45865 : else
45866 : {
45867 0 : result = result-ae_log(ae_minrealnumber, _state);
45868 : }
45869 : }
45870 : }
45871 0 : result = result/npoints;
45872 0 : ae_frame_leave(_state);
45873 0 : return result;
45874 : }
45875 :
45876 :
45877 : /*************************************************************************
45878 : RMS error on the test set
45879 :
45880 : INPUT PARAMETERS:
45881 : DF - decision forest model
45882 : XY - test set
45883 : NPoints - test set size
45884 :
45885 : RESULT:
45886 : root mean square error.
45887 : Its meaning for regression task is obvious. As for
45888 : classification task, RMS error means error when estimating posterior
45889 : probabilities.
45890 :
45891 : -- ALGLIB --
45892 : Copyright 16.02.2009 by Bochkanov Sergey
45893 : *************************************************************************/
45894 0 : double dfrmserror(decisionforest* df,
45895 : /* Real */ ae_matrix* xy,
45896 : ae_int_t npoints,
45897 : ae_state *_state)
45898 : {
45899 : ae_frame _frame_block;
45900 : ae_vector x;
45901 : ae_vector y;
45902 : ae_int_t i;
45903 : ae_int_t j;
45904 : ae_int_t k;
45905 : ae_int_t tmpi;
45906 : double result;
45907 :
45908 0 : ae_frame_make(_state, &_frame_block);
45909 0 : memset(&x, 0, sizeof(x));
45910 0 : memset(&y, 0, sizeof(y));
45911 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
45912 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
45913 :
45914 0 : ae_vector_set_length(&x, df->nvars-1+1, _state);
45915 0 : ae_vector_set_length(&y, df->nclasses-1+1, _state);
45916 0 : result = (double)(0);
45917 0 : for(i=0; i<=npoints-1; i++)
45918 : {
45919 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
45920 0 : dfprocess(df, &x, &y, _state);
45921 0 : if( df->nclasses>1 )
45922 : {
45923 :
45924 : /*
45925 : * classification-specific code
45926 : */
45927 0 : k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
45928 0 : tmpi = 0;
45929 0 : for(j=1; j<=df->nclasses-1; j++)
45930 : {
45931 0 : if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
45932 : {
45933 0 : tmpi = j;
45934 : }
45935 : }
45936 0 : for(j=0; j<=df->nclasses-1; j++)
45937 : {
45938 0 : if( j==k )
45939 : {
45940 0 : result = result+ae_sqr(y.ptr.p_double[j]-1, _state);
45941 : }
45942 : else
45943 : {
45944 0 : result = result+ae_sqr(y.ptr.p_double[j], _state);
45945 : }
45946 : }
45947 : }
45948 : else
45949 : {
45950 :
45951 : /*
45952 : * regression-specific code
45953 : */
45954 0 : result = result+ae_sqr(y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars], _state);
45955 : }
45956 : }
45957 0 : result = ae_sqrt(result/(npoints*df->nclasses), _state);
45958 0 : ae_frame_leave(_state);
45959 0 : return result;
45960 : }
45961 :
45962 :
45963 : /*************************************************************************
45964 : Average error on the test set
45965 :
45966 : INPUT PARAMETERS:
45967 : DF - decision forest model
45968 : XY - test set
45969 : NPoints - test set size
45970 :
45971 : RESULT:
45972 : Its meaning for regression task is obvious. As for
45973 : classification task, it means average error when estimating posterior
45974 : probabilities.
45975 :
45976 : -- ALGLIB --
45977 : Copyright 16.02.2009 by Bochkanov Sergey
45978 : *************************************************************************/
45979 0 : double dfavgerror(decisionforest* df,
45980 : /* Real */ ae_matrix* xy,
45981 : ae_int_t npoints,
45982 : ae_state *_state)
45983 : {
45984 : ae_frame _frame_block;
45985 : ae_vector x;
45986 : ae_vector y;
45987 : ae_int_t i;
45988 : ae_int_t j;
45989 : ae_int_t k;
45990 : double result;
45991 :
45992 0 : ae_frame_make(_state, &_frame_block);
45993 0 : memset(&x, 0, sizeof(x));
45994 0 : memset(&y, 0, sizeof(y));
45995 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
45996 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
45997 :
45998 0 : ae_vector_set_length(&x, df->nvars-1+1, _state);
45999 0 : ae_vector_set_length(&y, df->nclasses-1+1, _state);
46000 0 : result = (double)(0);
46001 0 : for(i=0; i<=npoints-1; i++)
46002 : {
46003 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
46004 0 : dfprocess(df, &x, &y, _state);
46005 0 : if( df->nclasses>1 )
46006 : {
46007 :
46008 : /*
46009 : * classification-specific code
46010 : */
46011 0 : k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
46012 0 : for(j=0; j<=df->nclasses-1; j++)
46013 : {
46014 0 : if( j==k )
46015 : {
46016 0 : result = result+ae_fabs(y.ptr.p_double[j]-1, _state);
46017 : }
46018 : else
46019 : {
46020 0 : result = result+ae_fabs(y.ptr.p_double[j], _state);
46021 : }
46022 : }
46023 : }
46024 : else
46025 : {
46026 :
46027 : /*
46028 : * regression-specific code
46029 : */
46030 0 : result = result+ae_fabs(y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars], _state);
46031 : }
46032 : }
46033 0 : result = result/(npoints*df->nclasses);
46034 0 : ae_frame_leave(_state);
46035 0 : return result;
46036 : }
46037 :
46038 :
46039 : /*************************************************************************
46040 : Average relative error on the test set
46041 :
46042 : INPUT PARAMETERS:
46043 : DF - decision forest model
46044 : XY - test set
46045 : NPoints - test set size
46046 :
46047 : RESULT:
46048 : Its meaning for regression task is obvious. As for
46049 : classification task, it means average relative error when estimating
46050 : posterior probability of belonging to the correct class.
46051 :
46052 : -- ALGLIB --
46053 : Copyright 16.02.2009 by Bochkanov Sergey
46054 : *************************************************************************/
46055 0 : double dfavgrelerror(decisionforest* df,
46056 : /* Real */ ae_matrix* xy,
46057 : ae_int_t npoints,
46058 : ae_state *_state)
46059 : {
46060 : ae_frame _frame_block;
46061 : ae_vector x;
46062 : ae_vector y;
46063 : ae_int_t relcnt;
46064 : ae_int_t i;
46065 : ae_int_t j;
46066 : ae_int_t k;
46067 : double result;
46068 :
46069 0 : ae_frame_make(_state, &_frame_block);
46070 0 : memset(&x, 0, sizeof(x));
46071 0 : memset(&y, 0, sizeof(y));
46072 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
46073 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
46074 :
46075 0 : ae_vector_set_length(&x, df->nvars-1+1, _state);
46076 0 : ae_vector_set_length(&y, df->nclasses-1+1, _state);
46077 0 : result = (double)(0);
46078 0 : relcnt = 0;
46079 0 : for(i=0; i<=npoints-1; i++)
46080 : {
46081 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
46082 0 : dfprocess(df, &x, &y, _state);
46083 0 : if( df->nclasses>1 )
46084 : {
46085 :
46086 : /*
46087 : * classification-specific code
46088 : */
46089 0 : k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
46090 0 : for(j=0; j<=df->nclasses-1; j++)
46091 : {
46092 0 : if( j==k )
46093 : {
46094 0 : result = result+ae_fabs(y.ptr.p_double[j]-1, _state);
46095 0 : relcnt = relcnt+1;
46096 : }
46097 : }
46098 : }
46099 : else
46100 : {
46101 :
46102 : /*
46103 : * regression-specific code
46104 : */
46105 0 : if( ae_fp_neq(xy->ptr.pp_double[i][df->nvars],(double)(0)) )
46106 : {
46107 0 : result = result+ae_fabs((y.ptr.p_double[0]-xy->ptr.pp_double[i][df->nvars])/xy->ptr.pp_double[i][df->nvars], _state);
46108 0 : relcnt = relcnt+1;
46109 : }
46110 : }
46111 : }
46112 0 : if( relcnt>0 )
46113 : {
46114 0 : result = result/relcnt;
46115 : }
46116 0 : ae_frame_leave(_state);
46117 0 : return result;
46118 : }
46119 :
46120 :
46121 : /*************************************************************************
46122 : Copying of DecisionForest strucure
46123 :
46124 : INPUT PARAMETERS:
46125 : DF1 - original
46126 :
46127 : OUTPUT PARAMETERS:
46128 : DF2 - copy
46129 :
46130 : -- ALGLIB --
46131 : Copyright 13.02.2009 by Bochkanov Sergey
46132 : *************************************************************************/
46133 0 : void dfcopy(decisionforest* df1, decisionforest* df2, ae_state *_state)
46134 : {
46135 : ae_int_t i;
46136 : ae_int_t bufsize;
46137 :
46138 0 : _decisionforest_clear(df2);
46139 :
46140 0 : if( df1->forestformat==dforest_dfuncompressedv0 )
46141 : {
46142 0 : df2->forestformat = df1->forestformat;
46143 0 : df2->nvars = df1->nvars;
46144 0 : df2->nclasses = df1->nclasses;
46145 0 : df2->ntrees = df1->ntrees;
46146 0 : df2->bufsize = df1->bufsize;
46147 0 : ae_vector_set_length(&df2->trees, df1->bufsize, _state);
46148 0 : ae_v_move(&df2->trees.ptr.p_double[0], 1, &df1->trees.ptr.p_double[0], 1, ae_v_len(0,df1->bufsize-1));
46149 0 : dfcreatebuffer(df2, &df2->buffer, _state);
46150 0 : return;
46151 : }
46152 0 : if( df1->forestformat==dforest_dfcompressedv0 )
46153 : {
46154 0 : df2->forestformat = df1->forestformat;
46155 0 : df2->usemantissa8 = df1->usemantissa8;
46156 0 : df2->nvars = df1->nvars;
46157 0 : df2->nclasses = df1->nclasses;
46158 0 : df2->ntrees = df1->ntrees;
46159 0 : bufsize = df1->trees8.cnt;
46160 0 : ae_vector_set_length(&(df2->trees8), bufsize, _state);
46161 0 : for(i=0; i<=bufsize-1; i++)
46162 : {
46163 0 : df2->trees8.ptr.p_ubyte[i] = (unsigned char)(df1->trees8.ptr.p_ubyte[i]);
46164 : }
46165 0 : dfcreatebuffer(df2, &df2->buffer, _state);
46166 0 : return;
46167 : }
46168 0 : ae_assert(ae_false, "DFCopy: unexpected forest format", _state);
46169 : }
46170 :
46171 :
46172 : /*************************************************************************
46173 : Serializer: allocation
46174 :
46175 : -- ALGLIB --
46176 : Copyright 14.03.2011 by Bochkanov Sergey
46177 : *************************************************************************/
46178 0 : void dfalloc(ae_serializer* s, decisionforest* forest, ae_state *_state)
46179 : {
46180 :
46181 :
46182 0 : if( forest->forestformat==dforest_dfuncompressedv0 )
46183 : {
46184 0 : ae_serializer_alloc_entry(s);
46185 0 : ae_serializer_alloc_entry(s);
46186 0 : ae_serializer_alloc_entry(s);
46187 0 : ae_serializer_alloc_entry(s);
46188 0 : ae_serializer_alloc_entry(s);
46189 0 : ae_serializer_alloc_entry(s);
46190 0 : allocrealarray(s, &forest->trees, forest->bufsize, _state);
46191 0 : return;
46192 : }
46193 0 : if( forest->forestformat==dforest_dfcompressedv0 )
46194 : {
46195 0 : ae_serializer_alloc_entry(s);
46196 0 : ae_serializer_alloc_entry(s);
46197 0 : ae_serializer_alloc_entry(s);
46198 0 : ae_serializer_alloc_entry(s);
46199 0 : ae_serializer_alloc_entry(s);
46200 0 : ae_serializer_alloc_entry(s);
46201 0 : ae_serializer_alloc_byte_array(s, &forest->trees8);
46202 0 : return;
46203 : }
46204 0 : ae_assert(ae_false, "DFAlloc: unexpected forest format", _state);
46205 : }
46206 :
46207 :
46208 : /*************************************************************************
46209 : Serializer: serialization
46210 :
46211 : -- ALGLIB --
46212 : Copyright 14.03.2011 by Bochkanov Sergey
46213 : *************************************************************************/
46214 0 : void dfserialize(ae_serializer* s,
46215 : decisionforest* forest,
46216 : ae_state *_state)
46217 : {
46218 :
46219 :
46220 0 : if( forest->forestformat==dforest_dfuncompressedv0 )
46221 : {
46222 0 : ae_serializer_serialize_int(s, getrdfserializationcode(_state), _state);
46223 0 : ae_serializer_serialize_int(s, dforest_dfuncompressedv0, _state);
46224 0 : ae_serializer_serialize_int(s, forest->nvars, _state);
46225 0 : ae_serializer_serialize_int(s, forest->nclasses, _state);
46226 0 : ae_serializer_serialize_int(s, forest->ntrees, _state);
46227 0 : ae_serializer_serialize_int(s, forest->bufsize, _state);
46228 0 : serializerealarray(s, &forest->trees, forest->bufsize, _state);
46229 0 : return;
46230 : }
46231 0 : if( forest->forestformat==dforest_dfcompressedv0 )
46232 : {
46233 0 : ae_serializer_serialize_int(s, getrdfserializationcode(_state), _state);
46234 0 : ae_serializer_serialize_int(s, forest->forestformat, _state);
46235 0 : ae_serializer_serialize_bool(s, forest->usemantissa8, _state);
46236 0 : ae_serializer_serialize_int(s, forest->nvars, _state);
46237 0 : ae_serializer_serialize_int(s, forest->nclasses, _state);
46238 0 : ae_serializer_serialize_int(s, forest->ntrees, _state);
46239 0 : ae_serializer_serialize_byte_array(s, &forest->trees8, _state);
46240 0 : return;
46241 : }
46242 0 : ae_assert(ae_false, "DFSerialize: unexpected forest format", _state);
46243 : }
46244 :
46245 :
46246 : /*************************************************************************
46247 : Serializer: unserialization
46248 :
46249 : -- ALGLIB --
46250 : Copyright 14.03.2011 by Bochkanov Sergey
46251 : *************************************************************************/
46252 0 : void dfunserialize(ae_serializer* s,
46253 : decisionforest* forest,
46254 : ae_state *_state)
46255 : {
46256 : ae_int_t i0;
46257 : ae_int_t forestformat;
46258 : ae_bool processed;
46259 :
46260 0 : _decisionforest_clear(forest);
46261 :
46262 :
46263 : /*
46264 : * check correctness of header
46265 : */
46266 0 : ae_serializer_unserialize_int(s, &i0, _state);
46267 0 : ae_assert(i0==getrdfserializationcode(_state), "DFUnserialize: stream header corrupted", _state);
46268 :
46269 : /*
46270 : * Read forest
46271 : */
46272 0 : ae_serializer_unserialize_int(s, &forestformat, _state);
46273 0 : processed = ae_false;
46274 0 : if( forestformat==dforest_dfuncompressedv0 )
46275 : {
46276 :
46277 : /*
46278 : * Unserialize data
46279 : */
46280 0 : forest->forestformat = forestformat;
46281 0 : ae_serializer_unserialize_int(s, &forest->nvars, _state);
46282 0 : ae_serializer_unserialize_int(s, &forest->nclasses, _state);
46283 0 : ae_serializer_unserialize_int(s, &forest->ntrees, _state);
46284 0 : ae_serializer_unserialize_int(s, &forest->bufsize, _state);
46285 0 : unserializerealarray(s, &forest->trees, _state);
46286 0 : processed = ae_true;
46287 : }
46288 0 : if( forestformat==dforest_dfcompressedv0 )
46289 : {
46290 :
46291 : /*
46292 : * Unserialize data
46293 : */
46294 0 : forest->forestformat = forestformat;
46295 0 : ae_serializer_unserialize_bool(s, &forest->usemantissa8, _state);
46296 0 : ae_serializer_unserialize_int(s, &forest->nvars, _state);
46297 0 : ae_serializer_unserialize_int(s, &forest->nclasses, _state);
46298 0 : ae_serializer_unserialize_int(s, &forest->ntrees, _state);
46299 0 : ae_serializer_unserialize_byte_array(s, &forest->trees8, _state);
46300 0 : processed = ae_true;
46301 : }
46302 0 : ae_assert(processed, "DFUnserialize: unexpected forest format", _state);
46303 :
46304 : /*
46305 : * Prepare buffer
46306 : */
46307 0 : dfcreatebuffer(forest, &forest->buffer, _state);
46308 0 : }
46309 :
46310 :
46311 : /*************************************************************************
46312 : This subroutine builds random decision forest.
46313 :
46314 : --------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
46315 :
46316 : -- ALGLIB --
46317 : Copyright 19.02.2009 by Bochkanov Sergey
46318 : *************************************************************************/
46319 0 : void dfbuildrandomdecisionforest(/* Real */ ae_matrix* xy,
46320 : ae_int_t npoints,
46321 : ae_int_t nvars,
46322 : ae_int_t nclasses,
46323 : ae_int_t ntrees,
46324 : double r,
46325 : ae_int_t* info,
46326 : decisionforest* df,
46327 : dfreport* rep,
46328 : ae_state *_state)
46329 : {
46330 : ae_int_t samplesize;
46331 :
46332 0 : *info = 0;
46333 0 : _decisionforest_clear(df);
46334 0 : _dfreport_clear(rep);
46335 :
46336 0 : if( ae_fp_less_eq(r,(double)(0))||ae_fp_greater(r,(double)(1)) )
46337 : {
46338 0 : *info = -1;
46339 0 : return;
46340 : }
46341 0 : samplesize = ae_maxint(ae_round(r*npoints, _state), 1, _state);
46342 0 : dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, ae_maxint(nvars/2, 1, _state), dforest_dfusestrongsplits+dforest_dfuseevs, info, df, rep, _state);
46343 : }
46344 :
46345 :
46346 : /*************************************************************************
46347 : This subroutine builds random decision forest.
46348 :
46349 : --------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
46350 :
46351 : -- ALGLIB --
46352 : Copyright 19.02.2009 by Bochkanov Sergey
46353 : *************************************************************************/
46354 0 : void dfbuildrandomdecisionforestx1(/* Real */ ae_matrix* xy,
46355 : ae_int_t npoints,
46356 : ae_int_t nvars,
46357 : ae_int_t nclasses,
46358 : ae_int_t ntrees,
46359 : ae_int_t nrndvars,
46360 : double r,
46361 : ae_int_t* info,
46362 : decisionforest* df,
46363 : dfreport* rep,
46364 : ae_state *_state)
46365 : {
46366 : ae_int_t samplesize;
46367 :
46368 0 : *info = 0;
46369 0 : _decisionforest_clear(df);
46370 0 : _dfreport_clear(rep);
46371 :
46372 0 : if( ae_fp_less_eq(r,(double)(0))||ae_fp_greater(r,(double)(1)) )
46373 : {
46374 0 : *info = -1;
46375 0 : return;
46376 : }
46377 0 : if( nrndvars<=0||nrndvars>nvars )
46378 : {
46379 0 : *info = -1;
46380 0 : return;
46381 : }
46382 0 : samplesize = ae_maxint(ae_round(r*npoints, _state), 1, _state);
46383 0 : dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, nrndvars, dforest_dfusestrongsplits+dforest_dfuseevs, info, df, rep, _state);
46384 : }
46385 :
46386 :
46387 0 : void dfbuildinternal(/* Real */ ae_matrix* xy,
46388 : ae_int_t npoints,
46389 : ae_int_t nvars,
46390 : ae_int_t nclasses,
46391 : ae_int_t ntrees,
46392 : ae_int_t samplesize,
46393 : ae_int_t nfeatures,
46394 : ae_int_t flags,
46395 : ae_int_t* info,
46396 : decisionforest* df,
46397 : dfreport* rep,
46398 : ae_state *_state)
46399 : {
46400 : ae_frame _frame_block;
46401 : decisionforestbuilder builder;
46402 : ae_int_t i;
46403 :
46404 0 : ae_frame_make(_state, &_frame_block);
46405 0 : memset(&builder, 0, sizeof(builder));
46406 0 : *info = 0;
46407 0 : _decisionforest_clear(df);
46408 0 : _dfreport_clear(rep);
46409 0 : _decisionforestbuilder_init(&builder, _state, ae_true);
46410 :
46411 :
46412 : /*
46413 : * Test for inputs
46414 : */
46415 0 : if( (((((npoints<1||samplesize<1)||samplesize>npoints)||nvars<1)||nclasses<1)||ntrees<1)||nfeatures<1 )
46416 : {
46417 0 : *info = -1;
46418 0 : ae_frame_leave(_state);
46419 0 : return;
46420 : }
46421 0 : if( nclasses>1 )
46422 : {
46423 0 : for(i=0; i<=npoints-1; i++)
46424 : {
46425 0 : if( ae_round(xy->ptr.pp_double[i][nvars], _state)<0||ae_round(xy->ptr.pp_double[i][nvars], _state)>=nclasses )
46426 : {
46427 0 : *info = -2;
46428 0 : ae_frame_leave(_state);
46429 0 : return;
46430 : }
46431 : }
46432 : }
46433 0 : *info = 1;
46434 0 : dfbuildercreate(&builder, _state);
46435 0 : dfbuildersetdataset(&builder, xy, npoints, nvars, nclasses, _state);
46436 0 : dfbuildersetsubsampleratio(&builder, (double)samplesize/(double)npoints, _state);
46437 0 : dfbuildersetrndvars(&builder, nfeatures, _state);
46438 0 : dfbuilderbuildrandomforest(&builder, ntrees, df, rep, _state);
46439 0 : ae_frame_leave(_state);
46440 : }
46441 :
46442 :
46443 : /*************************************************************************
46444 : Builds a range of random trees [TreeIdx0,TreeIdx1) using decision forest
46445 : algorithm. Tree index is used to seed per-tree RNG.
46446 :
46447 : -- ALGLIB --
46448 : Copyright 21.05.2018 by Bochkanov Sergey
46449 : *************************************************************************/
46450 0 : static void dforest_buildrandomtree(decisionforestbuilder* s,
46451 : ae_int_t treeidx0,
46452 : ae_int_t treeidx1,
46453 : ae_state *_state)
46454 : {
46455 : ae_frame _frame_block;
46456 : ae_int_t treeidx;
46457 : ae_int_t i;
46458 : ae_int_t j;
46459 : ae_int_t npoints;
46460 : ae_int_t nvars;
46461 : ae_int_t nclasses;
46462 : hqrndstate rs;
46463 : dfworkbuf *workbuf;
46464 : ae_smart_ptr _workbuf;
46465 : dfvotebuf *votebuf;
46466 : ae_smart_ptr _votebuf;
46467 : dftreebuf *treebuf;
46468 : ae_smart_ptr _treebuf;
46469 : ae_int_t treesize;
46470 : ae_int_t varstoselect;
46471 : ae_int_t workingsetsize;
46472 : double meanloss;
46473 :
46474 0 : ae_frame_make(_state, &_frame_block);
46475 0 : memset(&rs, 0, sizeof(rs));
46476 0 : memset(&_workbuf, 0, sizeof(_workbuf));
46477 0 : memset(&_votebuf, 0, sizeof(_votebuf));
46478 0 : memset(&_treebuf, 0, sizeof(_treebuf));
46479 0 : _hqrndstate_init(&rs, _state, ae_true);
46480 0 : ae_smart_ptr_init(&_workbuf, (void**)&workbuf, _state, ae_true);
46481 0 : ae_smart_ptr_init(&_votebuf, (void**)&votebuf, _state, ae_true);
46482 0 : ae_smart_ptr_init(&_treebuf, (void**)&treebuf, _state, ae_true);
46483 :
46484 :
46485 : /*
46486 : * Perform parallelization
46487 : */
46488 0 : if( treeidx1-treeidx0>1 )
46489 : {
46490 0 : if( _trypexec_dforest_buildrandomtree(s,treeidx0,treeidx1, _state) )
46491 : {
46492 0 : ae_frame_leave(_state);
46493 0 : return;
46494 : }
46495 0 : j = (treeidx1-treeidx0)/2;
46496 0 : dforest_buildrandomtree(s, treeidx0, treeidx0+j, _state);
46497 0 : dforest_buildrandomtree(s, treeidx0+j, treeidx1, _state);
46498 0 : ae_frame_leave(_state);
46499 0 : return;
46500 : }
46501 : else
46502 : {
46503 0 : ae_assert(treeidx1-treeidx0==1, "RDF: integrity check failed", _state);
46504 0 : treeidx = treeidx0;
46505 : }
46506 :
46507 : /*
46508 : * Prepare
46509 : */
46510 0 : npoints = s->npoints;
46511 0 : nvars = s->nvars;
46512 0 : nclasses = s->nclasses;
46513 0 : if( s->rdfglobalseed>0 )
46514 : {
46515 0 : hqrndseed(s->rdfglobalseed, 1+treeidx, &rs, _state);
46516 : }
46517 : else
46518 : {
46519 0 : hqrndseed(ae_randominteger(30000, _state), 1+treeidx, &rs, _state);
46520 : }
46521 :
46522 : /*
46523 : * Retrieve buffers.
46524 : */
46525 0 : ae_shared_pool_retrieve(&s->workpool, &_workbuf, _state);
46526 0 : ae_shared_pool_retrieve(&s->votepool, &_votebuf, _state);
46527 :
46528 : /*
46529 : * Prepare everything for tree construction.
46530 : */
46531 0 : ae_assert(workbuf->trnsize>=1, "DForest: integrity check failed (34636)", _state);
46532 0 : ae_assert(workbuf->oobsize>=0, "DForest: integrity check failed (45745)", _state);
46533 0 : ae_assert(workbuf->trnsize+workbuf->oobsize==npoints, "DForest: integrity check failed (89415)", _state);
46534 0 : workingsetsize = -1;
46535 0 : workbuf->varpoolsize = 0;
46536 0 : for(i=0; i<=nvars-1; i++)
46537 : {
46538 0 : if( ae_fp_neq(s->dsmin.ptr.p_double[i],s->dsmax.ptr.p_double[i]) )
46539 : {
46540 0 : workbuf->varpool.ptr.p_int[workbuf->varpoolsize] = i;
46541 0 : inc(&workbuf->varpoolsize, _state);
46542 : }
46543 : }
46544 0 : workingsetsize = workbuf->varpoolsize;
46545 0 : ae_assert(workingsetsize>=0, "DForest: integrity check failed (73f5)", _state);
46546 0 : for(i=0; i<=npoints-1; i++)
46547 : {
46548 0 : workbuf->tmp0i.ptr.p_int[i] = i;
46549 : }
46550 0 : for(i=0; i<=workbuf->trnsize-1; i++)
46551 : {
46552 0 : j = hqrnduniformi(&rs, npoints-i, _state);
46553 0 : swapelementsi(&workbuf->tmp0i, i, i+j, _state);
46554 0 : workbuf->trnset.ptr.p_int[i] = workbuf->tmp0i.ptr.p_int[i];
46555 0 : if( nclasses>1 )
46556 : {
46557 0 : workbuf->trnlabelsi.ptr.p_int[i] = s->dsival.ptr.p_int[workbuf->tmp0i.ptr.p_int[i]];
46558 : }
46559 : else
46560 : {
46561 0 : workbuf->trnlabelsr.ptr.p_double[i] = s->dsrval.ptr.p_double[workbuf->tmp0i.ptr.p_int[i]];
46562 : }
46563 0 : if( s->neediobmatrix )
46564 : {
46565 0 : s->iobmatrix.ptr.pp_bool[treeidx][workbuf->trnset.ptr.p_int[i]] = ae_true;
46566 : }
46567 : }
46568 0 : for(i=0; i<=workbuf->oobsize-1; i++)
46569 : {
46570 0 : j = workbuf->tmp0i.ptr.p_int[workbuf->trnsize+i];
46571 0 : workbuf->oobset.ptr.p_int[i] = j;
46572 0 : if( nclasses>1 )
46573 : {
46574 0 : workbuf->ooblabelsi.ptr.p_int[i] = s->dsival.ptr.p_int[j];
46575 : }
46576 : else
46577 : {
46578 0 : workbuf->ooblabelsr.ptr.p_double[i] = s->dsrval.ptr.p_double[j];
46579 : }
46580 : }
46581 0 : varstoselect = ae_round(ae_sqrt((double)(nvars), _state), _state);
46582 0 : if( ae_fp_greater(s->rdfvars,(double)(0)) )
46583 : {
46584 0 : varstoselect = ae_round(s->rdfvars, _state);
46585 : }
46586 0 : if( ae_fp_less(s->rdfvars,(double)(0)) )
46587 : {
46588 0 : varstoselect = ae_round(-nvars*s->rdfvars, _state);
46589 : }
46590 0 : varstoselect = ae_maxint(varstoselect, 1, _state);
46591 0 : varstoselect = ae_minint(varstoselect, nvars, _state);
46592 :
46593 : /*
46594 : * Perform recurrent construction
46595 : */
46596 0 : if( s->rdfimportance==dforest_needtrngini )
46597 : {
46598 0 : meanloss = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->tmpnrms2, _state);
46599 : }
46600 : else
46601 : {
46602 0 : meanloss = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, 0, workbuf->trnsize, &workbuf->ooblabelsi, &workbuf->ooblabelsr, 0, workbuf->oobsize, &workbuf->tmpnrms2, _state);
46603 : }
46604 0 : treesize = 1;
46605 0 : dforest_buildrandomtreerec(s, workbuf, workingsetsize, varstoselect, &workbuf->treebuf, votebuf, &rs, 0, workbuf->trnsize, 0, workbuf->oobsize, meanloss, meanloss, &treesize, _state);
46606 0 : workbuf->treebuf.ptr.p_double[0] = (double)(treesize);
46607 :
46608 : /*
46609 : * Store tree
46610 : */
46611 0 : ae_shared_pool_retrieve(&s->treefactory, &_treebuf, _state);
46612 0 : ae_vector_set_length(&treebuf->treebuf, treesize, _state);
46613 0 : for(i=0; i<=treesize-1; i++)
46614 : {
46615 0 : treebuf->treebuf.ptr.p_double[i] = workbuf->treebuf.ptr.p_double[i];
46616 : }
46617 0 : treebuf->treeidx = treeidx;
46618 0 : ae_shared_pool_recycle(&s->treepool, &_treebuf, _state);
46619 :
46620 : /*
46621 : * Return other buffers to appropriate pools
46622 : */
46623 0 : ae_shared_pool_recycle(&s->workpool, &_workbuf, _state);
46624 0 : ae_shared_pool_recycle(&s->votepool, &_votebuf, _state);
46625 :
46626 : /*
46627 : * Update progress indicator
46628 : */
46629 0 : threadunsafeincby(&s->rdfprogress, npoints, _state);
46630 0 : ae_frame_leave(_state);
46631 : }
46632 :
46633 :
46634 : /*************************************************************************
46635 : Serial stub for GPL edition.
46636 : *************************************************************************/
46637 0 : ae_bool _trypexec_dforest_buildrandomtree(decisionforestbuilder* s,
46638 : ae_int_t treeidx0,
46639 : ae_int_t treeidx1,
46640 : ae_state *_state)
46641 : {
46642 0 : return ae_false;
46643 : }
46644 :
46645 :
46646 : /*************************************************************************
46647 : Recurrent tree construction function using caller-allocated buffers and
46648 : caller-initialized RNG.
46649 :
46650 : Following iterms are processed:
46651 : * items [Idx0,Idx1) of WorkBuf.TrnSet
46652 : * items [OOBIdx0, OOBIdx1) of WorkBuf.OOBSet
46653 :
46654 : TreeSize on input must be 1 (header element of the tree), on output it
46655 : contains size of the tree.
46656 :
46657 : OOBLoss on input must contain value of MeanNRMS2(...) computed for entire
46658 : dataset.
46659 :
46660 : Variables from #0 to #WorkingSet-1 from WorkBuf.VarPool are used (for
46661 : block algorithm: blocks, not vars)
46662 :
46663 : -- ALGLIB --
46664 : Copyright 21.05.2018 by Bochkanov Sergey
46665 : *************************************************************************/
46666 0 : static void dforest_buildrandomtreerec(decisionforestbuilder* s,
46667 : dfworkbuf* workbuf,
46668 : ae_int_t workingset,
46669 : ae_int_t varstoselect,
46670 : /* Real */ ae_vector* treebuf,
46671 : dfvotebuf* votebuf,
46672 : hqrndstate* rs,
46673 : ae_int_t idx0,
46674 : ae_int_t idx1,
46675 : ae_int_t oobidx0,
46676 : ae_int_t oobidx1,
46677 : double meanloss,
46678 : double topmostmeanloss,
46679 : ae_int_t* treesize,
46680 : ae_state *_state)
46681 : {
46682 : ae_int_t npoints;
46683 : ae_int_t nclasses;
46684 : ae_int_t i;
46685 : ae_int_t j;
46686 : ae_int_t j0;
46687 : double v;
46688 : ae_bool labelsaresame;
46689 : ae_int_t offs;
46690 : ae_int_t varbest;
46691 : double splitbest;
46692 : ae_int_t i1;
46693 : ae_int_t i2;
46694 : ae_int_t idxtrn;
46695 : ae_int_t idxoob;
46696 : double meanloss0;
46697 : double meanloss1;
46698 :
46699 :
46700 0 : ae_assert(s->dstype==0, "not supported skbdgfsi!", _state);
46701 0 : ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (3445)", _state);
46702 0 : ae_assert(oobidx0<=oobidx1, "BuildRandomTreeRec: integrity check failed (7452)", _state);
46703 0 : npoints = s->npoints;
46704 0 : nclasses = s->nclasses;
46705 :
46706 : /*
46707 : * Check labels: all same or not?
46708 : */
46709 0 : if( nclasses>1 )
46710 : {
46711 0 : labelsaresame = ae_true;
46712 0 : for(i=0; i<=nclasses-1; i++)
46713 : {
46714 0 : workbuf->classpriors.ptr.p_int[i] = 0;
46715 : }
46716 0 : j0 = workbuf->trnlabelsi.ptr.p_int[idx0];
46717 0 : for(i=idx0; i<=idx1-1; i++)
46718 : {
46719 0 : j = workbuf->trnlabelsi.ptr.p_int[i];
46720 0 : workbuf->classpriors.ptr.p_int[j] = workbuf->classpriors.ptr.p_int[j]+1;
46721 0 : labelsaresame = labelsaresame&&j0==j;
46722 : }
46723 : }
46724 : else
46725 : {
46726 0 : labelsaresame = ae_false;
46727 : }
46728 :
46729 : /*
46730 : * Leaf node
46731 : */
46732 0 : if( idx1-idx0==1||labelsaresame )
46733 : {
46734 0 : if( nclasses==1 )
46735 : {
46736 0 : dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, workbuf->trnlabelsr.ptr.p_double[idx0], _state);
46737 : }
46738 : else
46739 : {
46740 0 : dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, (double)(workbuf->trnlabelsi.ptr.p_int[idx0]), _state);
46741 : }
46742 0 : return;
46743 : }
46744 :
46745 : /*
46746 : * Non-leaf node.
46747 : * Investigate possible splits.
46748 : */
46749 0 : ae_assert(s->rdfalgo==0, "BuildRandomForest: unexpected algo", _state);
46750 0 : dforest_choosecurrentsplitdense(s, workbuf, &workingset, varstoselect, rs, idx0, idx1, &varbest, &splitbest, _state);
46751 0 : if( varbest<0 )
46752 : {
46753 :
46754 : /*
46755 : * No good split was found; make leaf (label is randomly chosen) and exit.
46756 : */
46757 0 : if( nclasses>1 )
46758 : {
46759 0 : v = (double)(workbuf->trnlabelsi.ptr.p_int[idx0+hqrnduniformi(rs, idx1-idx0, _state)]);
46760 : }
46761 : else
46762 : {
46763 0 : v = workbuf->trnlabelsr.ptr.p_double[idx0+hqrnduniformi(rs, idx1-idx0, _state)];
46764 : }
46765 0 : dforest_outputleaf(s, workbuf, treebuf, votebuf, idx0, idx1, oobidx0, oobidx1, treesize, v, _state);
46766 0 : return;
46767 : }
46768 :
46769 : /*
46770 : * Good split WAS found, we can perform it:
46771 : * * first, we split training set
46772 : * * then, we similarly split OOB set
46773 : */
46774 0 : ae_assert(s->dstype==0, "not supported 54bfdh", _state);
46775 0 : offs = npoints*varbest;
46776 0 : i1 = idx0;
46777 0 : i2 = idx1-1;
46778 0 : while(i1<=i2)
46779 : {
46780 :
46781 : /*
46782 : * Reorder indexes so that left partition is in [Idx0..I1),
46783 : * and right partition is in [I2+1..Idx1)
46784 : */
46785 0 : if( workbuf->bestvals.ptr.p_double[i1]<splitbest )
46786 : {
46787 0 : i1 = i1+1;
46788 0 : continue;
46789 : }
46790 0 : if( workbuf->bestvals.ptr.p_double[i2]>=splitbest )
46791 : {
46792 0 : i2 = i2-1;
46793 0 : continue;
46794 : }
46795 0 : j = workbuf->trnset.ptr.p_int[i1];
46796 0 : workbuf->trnset.ptr.p_int[i1] = workbuf->trnset.ptr.p_int[i2];
46797 0 : workbuf->trnset.ptr.p_int[i2] = j;
46798 0 : if( nclasses>1 )
46799 : {
46800 0 : j = workbuf->trnlabelsi.ptr.p_int[i1];
46801 0 : workbuf->trnlabelsi.ptr.p_int[i1] = workbuf->trnlabelsi.ptr.p_int[i2];
46802 0 : workbuf->trnlabelsi.ptr.p_int[i2] = j;
46803 : }
46804 : else
46805 : {
46806 0 : v = workbuf->trnlabelsr.ptr.p_double[i1];
46807 0 : workbuf->trnlabelsr.ptr.p_double[i1] = workbuf->trnlabelsr.ptr.p_double[i2];
46808 0 : workbuf->trnlabelsr.ptr.p_double[i2] = v;
46809 : }
46810 0 : i1 = i1+1;
46811 0 : i2 = i2-1;
46812 : }
46813 0 : ae_assert(i1==i2+1, "BuildRandomTreeRec: integrity check failed (45rds3)", _state);
46814 0 : idxtrn = i1;
46815 0 : if( oobidx0<oobidx1 )
46816 : {
46817 :
46818 : /*
46819 : * Unlike the training subset, the out-of-bag subset corresponding to the
46820 : * current sequence of decisions can be empty; thus, we have to explicitly
46821 : * handle situation of zero OOB subset.
46822 : */
46823 0 : i1 = oobidx0;
46824 0 : i2 = oobidx1-1;
46825 0 : while(i1<=i2)
46826 : {
46827 :
46828 : /*
46829 : * Reorder indexes so that left partition is in [Idx0..I1),
46830 : * and right partition is in [I2+1..Idx1)
46831 : */
46832 0 : if( s->dsdata.ptr.p_double[offs+workbuf->oobset.ptr.p_int[i1]]<splitbest )
46833 : {
46834 0 : i1 = i1+1;
46835 0 : continue;
46836 : }
46837 0 : if( s->dsdata.ptr.p_double[offs+workbuf->oobset.ptr.p_int[i2]]>=splitbest )
46838 : {
46839 0 : i2 = i2-1;
46840 0 : continue;
46841 : }
46842 0 : j = workbuf->oobset.ptr.p_int[i1];
46843 0 : workbuf->oobset.ptr.p_int[i1] = workbuf->oobset.ptr.p_int[i2];
46844 0 : workbuf->oobset.ptr.p_int[i2] = j;
46845 0 : if( nclasses>1 )
46846 : {
46847 0 : j = workbuf->ooblabelsi.ptr.p_int[i1];
46848 0 : workbuf->ooblabelsi.ptr.p_int[i1] = workbuf->ooblabelsi.ptr.p_int[i2];
46849 0 : workbuf->ooblabelsi.ptr.p_int[i2] = j;
46850 : }
46851 : else
46852 : {
46853 0 : v = workbuf->ooblabelsr.ptr.p_double[i1];
46854 0 : workbuf->ooblabelsr.ptr.p_double[i1] = workbuf->ooblabelsr.ptr.p_double[i2];
46855 0 : workbuf->ooblabelsr.ptr.p_double[i2] = v;
46856 : }
46857 0 : i1 = i1+1;
46858 0 : i2 = i2-1;
46859 : }
46860 0 : ae_assert(i1==i2+1, "BuildRandomTreeRec: integrity check failed (643fs3)", _state);
46861 0 : idxoob = i1;
46862 : }
46863 : else
46864 : {
46865 0 : idxoob = oobidx0;
46866 : }
46867 :
46868 : /*
46869 : * Compute estimates of NRMS2 loss over TRN or OOB subsets, update Gini importances
46870 : */
46871 0 : if( s->rdfimportance==dforest_needtrngini )
46872 : {
46873 0 : meanloss0 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->tmpnrms2, _state);
46874 0 : meanloss1 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->tmpnrms2, _state);
46875 : }
46876 : else
46877 : {
46878 0 : meanloss0 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idx0, idxtrn, &workbuf->ooblabelsi, &workbuf->ooblabelsr, oobidx0, idxoob, &workbuf->tmpnrms2, _state);
46879 0 : meanloss1 = dforest_meannrms2(nclasses, &workbuf->trnlabelsi, &workbuf->trnlabelsr, idxtrn, idx1, &workbuf->ooblabelsi, &workbuf->ooblabelsr, idxoob, oobidx1, &workbuf->tmpnrms2, _state);
46880 : }
46881 0 : votebuf->giniimportances.ptr.p_double[varbest] = votebuf->giniimportances.ptr.p_double[varbest]+(meanloss-(meanloss0+meanloss1))/(topmostmeanloss+1.0e-20);
46882 :
46883 : /*
46884 : * Generate tree node and subtrees (recursively)
46885 : */
46886 0 : treebuf->ptr.p_double[*treesize] = (double)(varbest);
46887 0 : treebuf->ptr.p_double[*treesize+1] = splitbest;
46888 0 : i = *treesize;
46889 0 : *treesize = *treesize+dforest_innernodewidth;
46890 0 : dforest_buildrandomtreerec(s, workbuf, workingset, varstoselect, treebuf, votebuf, rs, idx0, idxtrn, oobidx0, idxoob, meanloss0, topmostmeanloss, treesize, _state);
46891 0 : treebuf->ptr.p_double[i+2] = (double)(*treesize);
46892 0 : dforest_buildrandomtreerec(s, workbuf, workingset, varstoselect, treebuf, votebuf, rs, idxtrn, idx1, idxoob, oobidx1, meanloss1, topmostmeanloss, treesize, _state);
46893 : }
46894 :
46895 :
46896 : /*************************************************************************
46897 : Estimates permutation variable importance ratings for a range of dataset
46898 : points.
46899 :
46900 : Initial call to this function should span entire range of the dataset,
46901 : [Idx0,Idx1)=[0,NPoints), because function performs initialization of some
46902 : internal structures when called with these arguments.
46903 :
46904 : -- ALGLIB --
46905 : Copyright 21.05.2018 by Bochkanov Sergey
46906 : *************************************************************************/
46907 0 : static void dforest_estimatevariableimportance(decisionforestbuilder* s,
46908 : ae_int_t sessionseed,
46909 : decisionforest* df,
46910 : ae_int_t ntrees,
46911 : dfreport* rep,
46912 : ae_state *_state)
46913 : {
46914 : ae_frame _frame_block;
46915 : ae_int_t npoints;
46916 : ae_int_t nvars;
46917 : ae_int_t nclasses;
46918 : ae_int_t nperm;
46919 : ae_int_t i;
46920 : ae_int_t j;
46921 : ae_int_t k;
46922 : dfvotebuf *vote;
46923 : ae_smart_ptr _vote;
46924 : ae_vector tmpr0;
46925 : ae_vector tmpr1;
46926 : ae_vector tmpi0;
46927 : ae_vector losses;
46928 : dfpermimpbuf permseed;
46929 : dfpermimpbuf *permresult;
46930 : ae_smart_ptr _permresult;
46931 : ae_shared_pool permpool;
46932 : double nopermloss;
46933 : double totalpermloss;
46934 : hqrndstate varimprs;
46935 :
46936 0 : ae_frame_make(_state, &_frame_block);
46937 0 : memset(&_vote, 0, sizeof(_vote));
46938 0 : memset(&tmpr0, 0, sizeof(tmpr0));
46939 0 : memset(&tmpr1, 0, sizeof(tmpr1));
46940 0 : memset(&tmpi0, 0, sizeof(tmpi0));
46941 0 : memset(&losses, 0, sizeof(losses));
46942 0 : memset(&permseed, 0, sizeof(permseed));
46943 0 : memset(&_permresult, 0, sizeof(_permresult));
46944 0 : memset(&permpool, 0, sizeof(permpool));
46945 0 : memset(&varimprs, 0, sizeof(varimprs));
46946 0 : ae_smart_ptr_init(&_vote, (void**)&vote, _state, ae_true);
46947 0 : ae_vector_init(&tmpr0, 0, DT_REAL, _state, ae_true);
46948 0 : ae_vector_init(&tmpr1, 0, DT_REAL, _state, ae_true);
46949 0 : ae_vector_init(&tmpi0, 0, DT_INT, _state, ae_true);
46950 0 : ae_vector_init(&losses, 0, DT_REAL, _state, ae_true);
46951 0 : _dfpermimpbuf_init(&permseed, _state, ae_true);
46952 0 : ae_smart_ptr_init(&_permresult, (void**)&permresult, _state, ae_true);
46953 0 : ae_shared_pool_init(&permpool, _state, ae_true);
46954 0 : _hqrndstate_init(&varimprs, _state, ae_true);
46955 :
46956 0 : npoints = s->npoints;
46957 0 : nvars = s->nvars;
46958 0 : nclasses = s->nclasses;
46959 :
46960 : /*
46961 : * No importance rating
46962 : */
46963 0 : if( s->rdfimportance==0 )
46964 : {
46965 0 : ae_frame_leave(_state);
46966 0 : return;
46967 : }
46968 :
46969 : /*
46970 : * Gini importance
46971 : */
46972 0 : if( s->rdfimportance==dforest_needtrngini||s->rdfimportance==dforest_needoobgini )
46973 : {
46974 :
46975 : /*
46976 : * Merge OOB Gini importances computed during tree generation
46977 : */
46978 0 : ae_shared_pool_first_recycled(&s->votepool, &_vote, _state);
46979 0 : while(vote!=NULL)
46980 : {
46981 0 : for(i=0; i<=nvars-1; i++)
46982 : {
46983 0 : rep->varimportances.ptr.p_double[i] = rep->varimportances.ptr.p_double[i]+vote->giniimportances.ptr.p_double[i]/ntrees;
46984 : }
46985 0 : ae_shared_pool_next_recycled(&s->votepool, &_vote, _state);
46986 : }
46987 0 : for(i=0; i<=nvars-1; i++)
46988 : {
46989 0 : rep->varimportances.ptr.p_double[i] = boundval(rep->varimportances.ptr.p_double[i], (double)(0), (double)(1), _state);
46990 : }
46991 :
46992 : /*
46993 : * Compute topvars[] array
46994 : */
46995 0 : ae_vector_set_length(&tmpr0, nvars, _state);
46996 0 : for(j=0; j<=nvars-1; j++)
46997 : {
46998 0 : tmpr0.ptr.p_double[j] = -rep->varimportances.ptr.p_double[j];
46999 0 : rep->topvars.ptr.p_int[j] = j;
47000 : }
47001 0 : tagsortfasti(&tmpr0, &rep->topvars, &tmpr1, &tmpi0, nvars, _state);
47002 0 : ae_frame_leave(_state);
47003 0 : return;
47004 : }
47005 :
47006 : /*
47007 : * Permutation importance
47008 : */
47009 0 : if( s->rdfimportance==dforest_needpermutation )
47010 : {
47011 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
47012 0 : ae_assert(s->iobmatrix.rows>=ntrees&&s->iobmatrix.cols>=npoints, "EstimateVariableImportance: integrity check failed (IOB)", _state);
47013 :
47014 : /*
47015 : * Generate packed representation of the shuffle which is applied to all variables
47016 : *
47017 : * Ideally we want to apply different permutations to different variables,
47018 : * i.e. we have to generate and store NPoints*NVars random numbers.
47019 : * However due to performance and memory restrictions we prefer to use compact
47020 : * representation:
47021 : * * we store one "reference" permutation P_ref in VarImpShuffle2[0:NPoints-1]
47022 : * * a permutation P_j applied to variable J is obtained by circularly shifting
47023 : * elements in P_ref by VarImpShuffle2[NPoints+J]
47024 : */
47025 0 : hqrndseed(sessionseed, 1117, &varimprs, _state);
47026 0 : ivectorsetlengthatleast(&s->varimpshuffle2, npoints+nvars, _state);
47027 0 : for(i=0; i<=npoints-1; i++)
47028 : {
47029 0 : s->varimpshuffle2.ptr.p_int[i] = i;
47030 : }
47031 0 : for(i=0; i<=npoints-2; i++)
47032 : {
47033 0 : j = i+hqrnduniformi(&varimprs, npoints-i, _state);
47034 0 : k = s->varimpshuffle2.ptr.p_int[i];
47035 0 : s->varimpshuffle2.ptr.p_int[i] = s->varimpshuffle2.ptr.p_int[j];
47036 0 : s->varimpshuffle2.ptr.p_int[j] = k;
47037 : }
47038 0 : for(i=0; i<=nvars-1; i++)
47039 : {
47040 0 : s->varimpshuffle2.ptr.p_int[npoints+i] = hqrnduniformi(&varimprs, npoints, _state);
47041 : }
47042 :
47043 : /*
47044 : * Prepare buffer object, seed pool
47045 : */
47046 0 : nperm = nvars+2;
47047 0 : ae_vector_set_length(&permseed.losses, nperm, _state);
47048 0 : for(j=0; j<=nperm-1; j++)
47049 : {
47050 0 : permseed.losses.ptr.p_double[j] = (double)(0);
47051 : }
47052 0 : ae_vector_set_length(&permseed.yv, nperm*nclasses, _state);
47053 0 : ae_vector_set_length(&permseed.xraw, nvars, _state);
47054 0 : ae_vector_set_length(&permseed.xdist, nvars, _state);
47055 0 : ae_vector_set_length(&permseed.xcur, nvars, _state);
47056 0 : ae_vector_set_length(&permseed.targety, nclasses, _state);
47057 0 : ae_vector_set_length(&permseed.startnodes, nvars, _state);
47058 0 : ae_vector_set_length(&permseed.y, nclasses, _state);
47059 0 : ae_shared_pool_set_seed(&permpool, &permseed, sizeof(permseed), _dfpermimpbuf_init, _dfpermimpbuf_init_copy, _dfpermimpbuf_destroy, _state);
47060 :
47061 : /*
47062 : * Recursively split subset and process (using parallel capabilities, if possible)
47063 : */
47064 0 : dforest_estimatepermutationimportances(s, df, ntrees, &permpool, 0, npoints, _state);
47065 :
47066 : /*
47067 : * Merge results
47068 : */
47069 0 : ae_vector_set_length(&losses, nperm, _state);
47070 0 : for(j=0; j<=nperm-1; j++)
47071 : {
47072 0 : losses.ptr.p_double[j] = 1.0e-20;
47073 : }
47074 0 : ae_shared_pool_first_recycled(&permpool, &_permresult, _state);
47075 0 : while(permresult!=NULL)
47076 : {
47077 0 : for(j=0; j<=nperm-1; j++)
47078 : {
47079 0 : losses.ptr.p_double[j] = losses.ptr.p_double[j]+permresult->losses.ptr.p_double[j];
47080 : }
47081 0 : ae_shared_pool_next_recycled(&permpool, &_permresult, _state);
47082 : }
47083 :
47084 : /*
47085 : * Compute importances
47086 : */
47087 0 : nopermloss = losses.ptr.p_double[nvars+1];
47088 0 : totalpermloss = losses.ptr.p_double[nvars];
47089 0 : for(i=0; i<=nvars-1; i++)
47090 : {
47091 0 : rep->varimportances.ptr.p_double[i] = 1-nopermloss/totalpermloss-(1-losses.ptr.p_double[i]/totalpermloss);
47092 0 : rep->varimportances.ptr.p_double[i] = boundval(rep->varimportances.ptr.p_double[i], (double)(0), (double)(1), _state);
47093 : }
47094 :
47095 : /*
47096 : * Compute topvars[] array
47097 : */
47098 0 : ae_vector_set_length(&tmpr0, nvars, _state);
47099 0 : for(j=0; j<=nvars-1; j++)
47100 : {
47101 0 : tmpr0.ptr.p_double[j] = -rep->varimportances.ptr.p_double[j];
47102 0 : rep->topvars.ptr.p_int[j] = j;
47103 : }
47104 0 : tagsortfasti(&tmpr0, &rep->topvars, &tmpr1, &tmpi0, nvars, _state);
47105 0 : ae_frame_leave(_state);
47106 0 : return;
47107 : }
47108 0 : ae_assert(ae_false, "EstimateVariableImportance: unexpected importance type", _state);
47109 0 : ae_frame_leave(_state);
47110 : }
47111 :
47112 :
47113 : /*************************************************************************
47114 : Serial stub for GPL edition.
47115 : *************************************************************************/
47116 0 : ae_bool _trypexec_dforest_estimatevariableimportance(decisionforestbuilder* s,
47117 : ae_int_t sessionseed,
47118 : decisionforest* df,
47119 : ae_int_t ntrees,
47120 : dfreport* rep,
47121 : ae_state *_state)
47122 : {
47123 0 : return ae_false;
47124 : }
47125 :
47126 :
47127 : /*************************************************************************
47128 : Estimates permutation variable importance ratings for a range of dataset
47129 : points.
47130 :
47131 : Initial call to this function should span entire range of the dataset,
47132 : [Idx0,Idx1)=[0,NPoints), because function performs initialization of some
47133 : internal structures when called with these arguments.
47134 :
47135 : -- ALGLIB --
47136 : Copyright 21.05.2018 by Bochkanov Sergey
47137 : *************************************************************************/
47138 0 : static void dforest_estimatepermutationimportances(decisionforestbuilder* s,
47139 : decisionforest* df,
47140 : ae_int_t ntrees,
47141 : ae_shared_pool* permpool,
47142 : ae_int_t idx0,
47143 : ae_int_t idx1,
47144 : ae_state *_state)
47145 : {
47146 : ae_frame _frame_block;
47147 : ae_int_t npoints;
47148 : ae_int_t nvars;
47149 : ae_int_t nclasses;
47150 : ae_int_t nperm;
47151 : ae_int_t i;
47152 : ae_int_t j;
47153 : ae_int_t k;
47154 : double v;
47155 : ae_int_t treeroot;
47156 : ae_int_t nodeoffs;
47157 : double prediction;
47158 : ae_int_t varidx;
47159 : ae_int_t oobcounts;
47160 : ae_int_t srcidx;
47161 : dfpermimpbuf *permimpbuf;
47162 : ae_smart_ptr _permimpbuf;
47163 :
47164 0 : ae_frame_make(_state, &_frame_block);
47165 0 : memset(&_permimpbuf, 0, sizeof(_permimpbuf));
47166 0 : ae_smart_ptr_init(&_permimpbuf, (void**)&permimpbuf, _state, ae_true);
47167 :
47168 0 : npoints = s->npoints;
47169 0 : nvars = s->nvars;
47170 0 : nclasses = s->nclasses;
47171 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
47172 0 : ae_assert((idx0>=0&&idx0<=idx1)&&idx1<=npoints, "EstimateVariableImportance: integrity check failed (idx)", _state);
47173 0 : ae_assert(s->iobmatrix.rows>=ntrees&&s->iobmatrix.cols>=npoints, "EstimateVariableImportance: integrity check failed (IOB)", _state);
47174 :
47175 : /*
47176 : * Perform parallelization if batch is too large
47177 : */
47178 0 : if( idx1-idx0>dforest_permutationimportancebatchsize )
47179 : {
47180 0 : if( _trypexec_dforest_estimatepermutationimportances(s,df,ntrees,permpool,idx0,idx1, _state) )
47181 : {
47182 0 : ae_frame_leave(_state);
47183 0 : return;
47184 : }
47185 0 : j = (idx1-idx0)/2;
47186 0 : dforest_estimatepermutationimportances(s, df, ntrees, permpool, idx0, idx0+j, _state);
47187 0 : dforest_estimatepermutationimportances(s, df, ntrees, permpool, idx0+j, idx1, _state);
47188 0 : ae_frame_leave(_state);
47189 0 : return;
47190 : }
47191 :
47192 : /*
47193 : * Retrieve buffer object from pool
47194 : */
47195 0 : ae_shared_pool_retrieve(permpool, &_permimpbuf, _state);
47196 :
47197 : /*
47198 : * Process range of points [idx0,idx1)
47199 : */
47200 0 : nperm = nvars+2;
47201 0 : for(i=idx0; i<=idx1-1; i++)
47202 : {
47203 0 : ae_assert(s->dstype==0, "EstimateVariableImportance: unexpected dataset type", _state);
47204 0 : for(j=0; j<=nvars-1; j++)
47205 : {
47206 0 : permimpbuf->xraw.ptr.p_double[j] = s->dsdata.ptr.p_double[j*npoints+i];
47207 0 : srcidx = s->varimpshuffle2.ptr.p_int[(i+s->varimpshuffle2.ptr.p_int[npoints+j])%npoints];
47208 0 : permimpbuf->xdist.ptr.p_double[j] = s->dsdata.ptr.p_double[j*npoints+srcidx];
47209 : }
47210 0 : if( nclasses>1 )
47211 : {
47212 0 : for(j=0; j<=nclasses-1; j++)
47213 : {
47214 0 : permimpbuf->targety.ptr.p_double[j] = (double)(0);
47215 : }
47216 0 : permimpbuf->targety.ptr.p_double[s->dsival.ptr.p_int[i]] = (double)(1);
47217 : }
47218 : else
47219 : {
47220 0 : permimpbuf->targety.ptr.p_double[0] = s->dsrval.ptr.p_double[i];
47221 : }
47222 :
47223 : /*
47224 : * Process all trees, for each tree compute NPerm losses corresponding
47225 : * to various permutations of variable values
47226 : */
47227 0 : for(j=0; j<=nperm*nclasses-1; j++)
47228 : {
47229 0 : permimpbuf->yv.ptr.p_double[j] = (double)(0);
47230 : }
47231 0 : oobcounts = 0;
47232 0 : treeroot = 0;
47233 0 : for(k=0; k<=ntrees-1; k++)
47234 : {
47235 0 : if( !s->iobmatrix.ptr.pp_bool[k][i] )
47236 : {
47237 :
47238 : /*
47239 : * Process original (unperturbed) point and analyze path from the
47240 : * tree root to the final leaf. Output prediction to RawPrediction.
47241 : *
47242 : * Additionally, for each variable in [0,NVars-1] save offset of
47243 : * the first split on this variable. It allows us to quickly compute
47244 : * tree decision when perturbation does not change decision path.
47245 : */
47246 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
47247 0 : nodeoffs = treeroot+1;
47248 0 : for(j=0; j<=nvars-1; j++)
47249 : {
47250 0 : permimpbuf->startnodes.ptr.p_int[j] = -1;
47251 : }
47252 0 : prediction = (double)(0);
47253 : for(;;)
47254 : {
47255 0 : if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
47256 : {
47257 0 : prediction = df->trees.ptr.p_double[nodeoffs+1];
47258 0 : break;
47259 : }
47260 0 : j = ae_round(df->trees.ptr.p_double[nodeoffs], _state);
47261 0 : if( permimpbuf->startnodes.ptr.p_int[j]<0 )
47262 : {
47263 0 : permimpbuf->startnodes.ptr.p_int[j] = nodeoffs;
47264 : }
47265 0 : if( permimpbuf->xraw.ptr.p_double[j]<df->trees.ptr.p_double[nodeoffs+1] )
47266 : {
47267 0 : nodeoffs = nodeoffs+dforest_innernodewidth;
47268 : }
47269 : else
47270 : {
47271 0 : nodeoffs = treeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
47272 : }
47273 : }
47274 :
47275 : /*
47276 : * Save loss for unperturbed point
47277 : */
47278 0 : varidx = nvars+1;
47279 0 : if( nclasses>1 )
47280 : {
47281 0 : j = ae_round(prediction, _state);
47282 0 : permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
47283 : }
47284 : else
47285 : {
47286 0 : permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+prediction;
47287 : }
47288 :
47289 : /*
47290 : * Save loss for all variables being perturbed (XDist).
47291 : * This loss is used as a reference loss when we compute R-squared.
47292 : */
47293 0 : varidx = nvars;
47294 0 : for(j=0; j<=nclasses-1; j++)
47295 : {
47296 0 : permimpbuf->y.ptr.p_double[j] = (double)(0);
47297 : }
47298 0 : dforest_dfprocessinternaluncompressed(df, treeroot, treeroot+1, &permimpbuf->xdist, &permimpbuf->y, _state);
47299 0 : for(j=0; j<=nclasses-1; j++)
47300 : {
47301 0 : permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+permimpbuf->y.ptr.p_double[j];
47302 : }
47303 :
47304 : /*
47305 : * Compute losses for variable #VarIdx being perturbed. Quite an often decision
47306 : * process does not actually depend on the variable #VarIdx (path from the tree
47307 : * root does not include splits on this variable). In such cases we perform
47308 : * quick exit from the loop with precomputed value.
47309 : */
47310 0 : for(j=0; j<=nvars-1; j++)
47311 : {
47312 0 : permimpbuf->xcur.ptr.p_double[j] = permimpbuf->xraw.ptr.p_double[j];
47313 : }
47314 0 : for(varidx=0; varidx<=nvars-1; varidx++)
47315 : {
47316 0 : if( permimpbuf->startnodes.ptr.p_int[varidx]>=0 )
47317 : {
47318 :
47319 : /*
47320 : * Path from tree root to the final leaf involves split on variable #VarIdx.
47321 : * Restart computation from the position first split on #VarIdx.
47322 : */
47323 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "EstimateVariableImportance: integrity check failed (ff)", _state);
47324 0 : permimpbuf->xcur.ptr.p_double[varidx] = permimpbuf->xdist.ptr.p_double[varidx];
47325 0 : nodeoffs = permimpbuf->startnodes.ptr.p_int[varidx];
47326 : for(;;)
47327 : {
47328 0 : if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
47329 : {
47330 0 : if( nclasses>1 )
47331 : {
47332 0 : j = ae_round(df->trees.ptr.p_double[nodeoffs+1], _state);
47333 0 : permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
47334 : }
47335 : else
47336 : {
47337 0 : permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+df->trees.ptr.p_double[nodeoffs+1];
47338 : }
47339 0 : break;
47340 : }
47341 0 : j = ae_round(df->trees.ptr.p_double[nodeoffs], _state);
47342 0 : if( permimpbuf->xcur.ptr.p_double[j]<df->trees.ptr.p_double[nodeoffs+1] )
47343 : {
47344 0 : nodeoffs = nodeoffs+dforest_innernodewidth;
47345 : }
47346 : else
47347 : {
47348 0 : nodeoffs = treeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
47349 : }
47350 : }
47351 0 : permimpbuf->xcur.ptr.p_double[varidx] = permimpbuf->xraw.ptr.p_double[varidx];
47352 : }
47353 : else
47354 : {
47355 :
47356 : /*
47357 : * Path from tree root to the final leaf does NOT involve split on variable #VarIdx.
47358 : * Permutation does not change tree output, reuse already computed value.
47359 : */
47360 0 : if( nclasses>1 )
47361 : {
47362 0 : j = ae_round(prediction, _state);
47363 0 : permimpbuf->yv.ptr.p_double[varidx*nclasses+j] = permimpbuf->yv.ptr.p_double[varidx*nclasses+j]+1;
47364 : }
47365 : else
47366 : {
47367 0 : permimpbuf->yv.ptr.p_double[varidx] = permimpbuf->yv.ptr.p_double[varidx]+prediction;
47368 : }
47369 : }
47370 : }
47371 :
47372 : /*
47373 : * update OOB counter
47374 : */
47375 0 : inc(&oobcounts, _state);
47376 : }
47377 0 : treeroot = treeroot+ae_round(df->trees.ptr.p_double[treeroot], _state);
47378 : }
47379 :
47380 : /*
47381 : * Now YV[] stores NPerm versions of the forest output for various permutations of variable values.
47382 : * Update losses.
47383 : */
47384 0 : for(j=0; j<=nperm-1; j++)
47385 : {
47386 0 : for(k=0; k<=nclasses-1; k++)
47387 : {
47388 0 : permimpbuf->yv.ptr.p_double[j*nclasses+k] = permimpbuf->yv.ptr.p_double[j*nclasses+k]/coalesce((double)(oobcounts), (double)(1), _state);
47389 : }
47390 0 : v = (double)(0);
47391 0 : for(k=0; k<=nclasses-1; k++)
47392 : {
47393 0 : v = v+ae_sqr(permimpbuf->yv.ptr.p_double[j*nclasses+k]-permimpbuf->targety.ptr.p_double[k], _state);
47394 : }
47395 0 : permimpbuf->losses.ptr.p_double[j] = permimpbuf->losses.ptr.p_double[j]+v;
47396 : }
47397 :
47398 : /*
47399 : * Update progress indicator
47400 : */
47401 0 : threadunsafeincby(&s->rdfprogress, ntrees, _state);
47402 : }
47403 :
47404 : /*
47405 : * Recycle buffer object with updated Losses[] field
47406 : */
47407 0 : ae_shared_pool_recycle(permpool, &_permimpbuf, _state);
47408 0 : ae_frame_leave(_state);
47409 : }
47410 :
47411 :
47412 : /*************************************************************************
47413 : Serial stub for GPL edition.
47414 : *************************************************************************/
47415 0 : ae_bool _trypexec_dforest_estimatepermutationimportances(decisionforestbuilder* s,
47416 : decisionforest* df,
47417 : ae_int_t ntrees,
47418 : ae_shared_pool* permpool,
47419 : ae_int_t idx0,
47420 : ae_int_t idx1,
47421 : ae_state *_state)
47422 : {
47423 0 : return ae_false;
47424 : }
47425 :
47426 :
47427 : /*************************************************************************
47428 : Sets report fields to their default values
47429 :
47430 : -- ALGLIB --
47431 : Copyright 21.05.2018 by Bochkanov Sergey
47432 : *************************************************************************/
47433 0 : static void dforest_cleanreport(decisionforestbuilder* s,
47434 : dfreport* rep,
47435 : ae_state *_state)
47436 : {
47437 : ae_int_t i;
47438 :
47439 :
47440 0 : rep->relclserror = (double)(0);
47441 0 : rep->avgce = (double)(0);
47442 0 : rep->rmserror = (double)(0);
47443 0 : rep->avgerror = (double)(0);
47444 0 : rep->avgrelerror = (double)(0);
47445 0 : rep->oobrelclserror = (double)(0);
47446 0 : rep->oobavgce = (double)(0);
47447 0 : rep->oobrmserror = (double)(0);
47448 0 : rep->oobavgerror = (double)(0);
47449 0 : rep->oobavgrelerror = (double)(0);
47450 0 : ae_vector_set_length(&rep->topvars, s->nvars, _state);
47451 0 : ae_vector_set_length(&rep->varimportances, s->nvars, _state);
47452 0 : for(i=0; i<=s->nvars-1; i++)
47453 : {
47454 0 : rep->topvars.ptr.p_int[i] = i;
47455 0 : rep->varimportances.ptr.p_double[i] = (double)(0);
47456 : }
47457 0 : }
47458 :
47459 :
47460 : /*************************************************************************
47461 : This function returns NRMS2 loss (sum of squared residuals) for a constant-
47462 : output model:
47463 : * model output is a mean over TRN set being passed (for classification
47464 : problems - NClasses-dimensional vector of class probabilities)
47465 : * model is evaluated over TST set being passed, with L2 loss being returned
47466 :
47467 : Input parameters:
47468 : NClasses - ">1" for classification, "=1" for regression
47469 : TrnLabelsI - training set labels, class indexes (for NClasses>1)
47470 : TrnLabelsR - training set output values (for NClasses=1)
47471 : TrnIdx0, TrnIdx1 - a range [Idx0,Idx1) of elements in LabelsI/R is considered
47472 : TstLabelsI - training set labels, class indexes (for NClasses>1)
47473 : TstLabelsR - training set output values (for NClasses=1)
47474 : TstIdx0, TstIdx1 - a range [Idx0,Idx1) of elements in LabelsI/R is considered
47475 : TmpI - temporary array, reallocated as needed
47476 :
47477 : Result:
47478 : sum of squared residuals;
47479 : for NClasses>=2 it coincides with Gini impurity times (Idx1-Idx0)
47480 :
47481 : Following fields of WorkBuf are used as temporaries:
47482 : * TmpMeanNRMS2
47483 :
47484 : -- ALGLIB --
47485 : Copyright 21.05.2018 by Bochkanov Sergey
47486 : *************************************************************************/
47487 0 : static double dforest_meannrms2(ae_int_t nclasses,
47488 : /* Integer */ ae_vector* trnlabelsi,
47489 : /* Real */ ae_vector* trnlabelsr,
47490 : ae_int_t trnidx0,
47491 : ae_int_t trnidx1,
47492 : /* Integer */ ae_vector* tstlabelsi,
47493 : /* Real */ ae_vector* tstlabelsr,
47494 : ae_int_t tstidx0,
47495 : ae_int_t tstidx1,
47496 : /* Integer */ ae_vector* tmpi,
47497 : ae_state *_state)
47498 : {
47499 : ae_int_t i;
47500 : ae_int_t k;
47501 : ae_int_t ntrn;
47502 : ae_int_t ntst;
47503 : double v;
47504 : double vv;
47505 : double invntrn;
47506 : double pitrn;
47507 : double nitst;
47508 : double result;
47509 :
47510 :
47511 0 : ae_assert(trnidx0<=trnidx1, "MeanNRMS2: integrity check failed (8754)", _state);
47512 0 : ae_assert(tstidx0<=tstidx1, "MeanNRMS2: integrity check failed (8754)", _state);
47513 0 : result = (double)(0);
47514 0 : ntrn = trnidx1-trnidx0;
47515 0 : ntst = tstidx1-tstidx0;
47516 0 : if( ntrn==0||ntst==0 )
47517 : {
47518 0 : return result;
47519 : }
47520 0 : invntrn = 1.0/ntrn;
47521 0 : if( nclasses>1 )
47522 : {
47523 :
47524 : /*
47525 : * Classification problem
47526 : */
47527 0 : ivectorsetlengthatleast(tmpi, 2*nclasses, _state);
47528 0 : for(i=0; i<=2*nclasses-1; i++)
47529 : {
47530 0 : tmpi->ptr.p_int[i] = 0;
47531 : }
47532 0 : for(i=trnidx0; i<=trnidx1-1; i++)
47533 : {
47534 0 : k = trnlabelsi->ptr.p_int[i];
47535 0 : tmpi->ptr.p_int[k] = tmpi->ptr.p_int[k]+1;
47536 : }
47537 0 : for(i=tstidx0; i<=tstidx1-1; i++)
47538 : {
47539 0 : k = tstlabelsi->ptr.p_int[i];
47540 0 : tmpi->ptr.p_int[k+nclasses] = tmpi->ptr.p_int[k+nclasses]+1;
47541 : }
47542 0 : for(i=0; i<=nclasses-1; i++)
47543 : {
47544 0 : pitrn = tmpi->ptr.p_int[i]*invntrn;
47545 0 : nitst = (double)(tmpi->ptr.p_int[i+nclasses]);
47546 0 : result = result+nitst*(1-pitrn)*(1-pitrn);
47547 0 : result = result+(ntst-nitst)*pitrn*pitrn;
47548 : }
47549 : }
47550 : else
47551 : {
47552 :
47553 : /*
47554 : * regression-specific code
47555 : */
47556 0 : v = (double)(0);
47557 0 : for(i=trnidx0; i<=trnidx1-1; i++)
47558 : {
47559 0 : v = v+trnlabelsr->ptr.p_double[i];
47560 : }
47561 0 : v = v*invntrn;
47562 0 : for(i=tstidx0; i<=tstidx1-1; i++)
47563 : {
47564 0 : vv = tstlabelsr->ptr.p_double[i]-v;
47565 0 : result = result+vv*vv;
47566 : }
47567 : }
47568 0 : return result;
47569 : }
47570 :
47571 :
47572 : /*************************************************************************
47573 : This function is a part of the recurrent tree construction function; it
47574 : selects variable for splitting according to current tree construction
47575 : algorithm.
47576 :
47577 : Note: modifies VarsInPool, may decrease it if some variables become non-informative
47578 : and leave the pool.
47579 :
47580 : -- ALGLIB --
47581 : Copyright 21.05.2018 by Bochkanov Sergey
47582 : *************************************************************************/
47583 0 : static void dforest_choosecurrentsplitdense(decisionforestbuilder* s,
47584 : dfworkbuf* workbuf,
47585 : ae_int_t* varsinpool,
47586 : ae_int_t varstoselect,
47587 : hqrndstate* rs,
47588 : ae_int_t idx0,
47589 : ae_int_t idx1,
47590 : ae_int_t* varbest,
47591 : double* splitbest,
47592 : ae_state *_state)
47593 : {
47594 : ae_int_t npoints;
47595 : double errbest;
47596 : ae_int_t varstried;
47597 : ae_int_t varcur;
47598 : ae_bool valuesaresame;
47599 : ae_int_t offs;
47600 : double split;
47601 : ae_int_t i;
47602 : double v;
47603 : double v0;
47604 : double currms;
47605 : ae_int_t info;
47606 :
47607 0 : *varbest = 0;
47608 0 : *splitbest = 0;
47609 :
47610 0 : ae_assert(s->dstype==0, "sparsity is not supported 4terg!", _state);
47611 0 : ae_assert(s->rdfalgo==0, "BuildRandomTreeRec: integrity check failed (1657)", _state);
47612 0 : ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (3445)", _state);
47613 0 : npoints = s->npoints;
47614 :
47615 : /*
47616 : * Select split according to dense direct RDF algorithm
47617 : */
47618 0 : *varbest = -1;
47619 0 : errbest = ae_maxrealnumber;
47620 0 : *splitbest = (double)(0);
47621 0 : varstried = 0;
47622 0 : while(varstried<=ae_minint(varstoselect, *varsinpool, _state)-1)
47623 : {
47624 :
47625 : /*
47626 : * select variables from pool
47627 : */
47628 0 : swapelementsi(&workbuf->varpool, varstried, varstried+hqrnduniformi(rs, *varsinpool-varstried, _state), _state);
47629 0 : varcur = workbuf->varpool.ptr.p_int[varstried];
47630 :
47631 : /*
47632 : * Load variable values to working array.
47633 : * If all variable values are same, variable is excluded from pool and we re-run variable selection.
47634 : */
47635 0 : valuesaresame = ae_true;
47636 0 : ae_assert(s->dstype==0, "not supported segsv34fs", _state);
47637 0 : offs = npoints*varcur;
47638 0 : v0 = s->dsdata.ptr.p_double[offs+workbuf->trnset.ptr.p_int[idx0]];
47639 0 : for(i=idx0; i<=idx1-1; i++)
47640 : {
47641 0 : v = s->dsdata.ptr.p_double[offs+workbuf->trnset.ptr.p_int[i]];
47642 0 : workbuf->curvals.ptr.p_double[i] = v;
47643 0 : valuesaresame = valuesaresame&&v==v0;
47644 : }
47645 0 : if( valuesaresame )
47646 : {
47647 :
47648 : /*
47649 : * Variable does not change across current subset.
47650 : * Exclude variable from pool, go to the next iteration.
47651 : * VarsTried is not increased.
47652 : *
47653 : * NOTE: it is essential that updated VarsInPool is passed
47654 : * down to children but not up to caller - it is
47655 : * possible that one level higher this variable is
47656 : * not-fixed.
47657 : */
47658 0 : swapelementsi(&workbuf->varpool, varstried, *varsinpool-1, _state);
47659 0 : *varsinpool = *varsinpool-1;
47660 0 : continue;
47661 : }
47662 :
47663 : /*
47664 : * Now we are ready to infer the split
47665 : */
47666 0 : dforest_evaluatedensesplit(s, workbuf, rs, varcur, idx0, idx1, &info, &split, &currms, _state);
47667 0 : if( info>0&&(*varbest<0||ae_fp_less_eq(currms,errbest)) )
47668 : {
47669 0 : errbest = currms;
47670 0 : *varbest = varcur;
47671 0 : *splitbest = split;
47672 0 : for(i=idx0; i<=idx1-1; i++)
47673 : {
47674 0 : workbuf->bestvals.ptr.p_double[i] = workbuf->curvals.ptr.p_double[i];
47675 : }
47676 : }
47677 :
47678 : /*
47679 : * Next iteration
47680 : */
47681 0 : varstried = varstried+1;
47682 : }
47683 0 : }
47684 :
47685 :
47686 : /*************************************************************************
47687 : This function performs split on some specific dense variable whose values
47688 : are stored in WorkBuf.CurVals[Idx0,Idx1) and labels are stored in
47689 : WorkBuf.TrnLabelsR/I[Idx0,Idx1).
47690 :
47691 : It returns split value and associated RMS error. It is responsibility of
47692 : the caller to make sure that variable has at least two distinct values,
47693 : i.e. it is possible to make a split.
47694 :
47695 : Precomputed values of following fields of WorkBuf are used:
47696 : * ClassPriors
47697 :
47698 : Following fields of WorkBuf are used as temporaries:
47699 : * ClassTotals0,1,01
47700 : * Tmp0I, Tmp1I, Tmp0R, Tmp1R, Tmp2R, Tmp3R
47701 :
47702 : -- ALGLIB --
47703 : Copyright 21.05.2018 by Bochkanov Sergey
47704 : *************************************************************************/
47705 0 : static void dforest_evaluatedensesplit(decisionforestbuilder* s,
47706 : dfworkbuf* workbuf,
47707 : hqrndstate* rs,
47708 : ae_int_t splitvar,
47709 : ae_int_t idx0,
47710 : ae_int_t idx1,
47711 : ae_int_t* info,
47712 : double* split,
47713 : double* rms,
47714 : ae_state *_state)
47715 : {
47716 : ae_int_t nclasses;
47717 : ae_int_t i;
47718 : ae_int_t j;
47719 : ae_int_t k0;
47720 : ae_int_t k1;
47721 : double v;
47722 : double v0;
47723 : double v1;
47724 : double v2;
47725 : ae_int_t sl;
47726 : ae_int_t sr;
47727 :
47728 0 : *info = 0;
47729 0 : *split = 0;
47730 0 : *rms = 0;
47731 :
47732 0 : ae_assert(idx0<idx1, "BuildRandomTreeRec: integrity check failed (8754)", _state);
47733 0 : nclasses = s->nclasses;
47734 0 : if( s->dsbinary.ptr.p_bool[splitvar] )
47735 : {
47736 :
47737 : /*
47738 : * Try simple binary split, if possible
47739 : * Split can be inferred from minimum/maximum values, just calculate RMS error
47740 : */
47741 0 : *info = 1;
47742 0 : *split = dforest_getsplit(s, s->dsmin.ptr.p_double[splitvar], s->dsmax.ptr.p_double[splitvar], rs, _state);
47743 0 : if( nclasses>1 )
47744 : {
47745 :
47746 : /*
47747 : * Classification problem
47748 : */
47749 0 : for(j=0; j<=nclasses-1; j++)
47750 : {
47751 0 : workbuf->classtotals0.ptr.p_int[j] = 0;
47752 : }
47753 0 : sl = 0;
47754 0 : for(i=idx0; i<=idx1-1; i++)
47755 : {
47756 0 : if( workbuf->curvals.ptr.p_double[i]<*split )
47757 : {
47758 0 : j = workbuf->trnlabelsi.ptr.p_int[i];
47759 0 : workbuf->classtotals0.ptr.p_int[j] = workbuf->classtotals0.ptr.p_int[j]+1;
47760 0 : sl = sl+1;
47761 : }
47762 : }
47763 0 : sr = idx1-idx0-sl;
47764 0 : ae_assert(sl!=0&&sr!=0, "BuildRandomTreeRec: something strange, impossible failure!", _state);
47765 0 : v0 = (double)1/(double)sl;
47766 0 : v1 = (double)1/(double)sr;
47767 0 : *rms = (double)(0);
47768 0 : for(j=0; j<=nclasses-1; j++)
47769 : {
47770 0 : k0 = workbuf->classtotals0.ptr.p_int[j];
47771 0 : k1 = workbuf->classpriors.ptr.p_int[j]-k0;
47772 0 : *rms = *rms+k0*(1-v0*k0)+k1*(1-v1*k1);
47773 : }
47774 0 : *rms = ae_sqrt(*rms/(nclasses*(idx1-idx0+1)), _state);
47775 : }
47776 : else
47777 : {
47778 :
47779 : /*
47780 : * regression-specific code
47781 : */
47782 0 : sl = 0;
47783 0 : sr = 0;
47784 0 : v1 = (double)(0);
47785 0 : v2 = (double)(0);
47786 0 : for(j=idx0; j<=idx1-1; j++)
47787 : {
47788 0 : if( workbuf->curvals.ptr.p_double[j]<*split )
47789 : {
47790 0 : v1 = v1+workbuf->trnlabelsr.ptr.p_double[j];
47791 0 : sl = sl+1;
47792 : }
47793 : else
47794 : {
47795 0 : v2 = v2+workbuf->trnlabelsr.ptr.p_double[j];
47796 0 : sr = sr+1;
47797 : }
47798 : }
47799 0 : ae_assert(sl!=0&&sr!=0, "BuildRandomTreeRec: something strange, impossible failure!", _state);
47800 0 : v1 = v1/sl;
47801 0 : v2 = v2/sr;
47802 0 : *rms = (double)(0);
47803 0 : for(j=0; j<=idx1-idx0-1; j++)
47804 : {
47805 0 : v = workbuf->trnlabelsr.ptr.p_double[idx0+j];
47806 0 : if( workbuf->curvals.ptr.p_double[j]<*split )
47807 : {
47808 0 : v = v-v1;
47809 : }
47810 : else
47811 : {
47812 0 : v = v-v2;
47813 : }
47814 0 : *rms = *rms+v*v;
47815 : }
47816 0 : *rms = ae_sqrt(*rms/(idx1-idx0+1), _state);
47817 : }
47818 : }
47819 : else
47820 : {
47821 :
47822 : /*
47823 : * General split
47824 : */
47825 0 : *info = 0;
47826 0 : if( nclasses>1 )
47827 : {
47828 0 : for(i=0; i<=idx1-idx0-1; i++)
47829 : {
47830 0 : workbuf->tmp0r.ptr.p_double[i] = workbuf->curvals.ptr.p_double[idx0+i];
47831 0 : workbuf->tmp0i.ptr.p_int[i] = workbuf->trnlabelsi.ptr.p_int[idx0+i];
47832 : }
47833 0 : dforest_classifiersplit(s, workbuf, &workbuf->tmp0r, &workbuf->tmp0i, idx1-idx0, rs, info, split, rms, &workbuf->tmp1r, &workbuf->tmp1i, _state);
47834 : }
47835 : else
47836 : {
47837 0 : for(i=0; i<=idx1-idx0-1; i++)
47838 : {
47839 0 : workbuf->tmp0r.ptr.p_double[i] = workbuf->curvals.ptr.p_double[idx0+i];
47840 0 : workbuf->tmp1r.ptr.p_double[i] = workbuf->trnlabelsr.ptr.p_double[idx0+i];
47841 : }
47842 0 : dforest_regressionsplit(s, workbuf, &workbuf->tmp0r, &workbuf->tmp1r, idx1-idx0, info, split, rms, &workbuf->tmp2r, &workbuf->tmp3r, _state);
47843 : }
47844 : }
47845 0 : }
47846 :
47847 :
47848 : /*************************************************************************
47849 : Classifier split
47850 : *************************************************************************/
47851 0 : static void dforest_classifiersplit(decisionforestbuilder* s,
47852 : dfworkbuf* workbuf,
47853 : /* Real */ ae_vector* x,
47854 : /* Integer */ ae_vector* c,
47855 : ae_int_t n,
47856 : hqrndstate* rs,
47857 : ae_int_t* info,
47858 : double* threshold,
47859 : double* e,
47860 : /* Real */ ae_vector* sortrbuf,
47861 : /* Integer */ ae_vector* sortibuf,
47862 : ae_state *_state)
47863 : {
47864 : ae_int_t i;
47865 : ae_int_t j;
47866 : ae_int_t k;
47867 : ae_int_t n0;
47868 : ae_int_t n0prev;
47869 : double v;
47870 : ae_int_t advanceby;
47871 : double rms;
47872 : ae_int_t k0;
47873 : ae_int_t k1;
47874 : double v0;
47875 : double v1;
47876 : ae_int_t nclasses;
47877 : double vmin;
47878 : double vmax;
47879 :
47880 0 : *info = 0;
47881 0 : *threshold = 0;
47882 0 : *e = 0;
47883 :
47884 0 : ae_assert((s->rdfsplitstrength==0||s->rdfsplitstrength==1)||s->rdfsplitstrength==2, "RDF: unexpected split type at ClassifierSplit()", _state);
47885 0 : nclasses = s->nclasses;
47886 0 : advanceby = 1;
47887 0 : if( n>=20 )
47888 : {
47889 0 : advanceby = ae_maxint(2, ae_round(n*0.05, _state), _state);
47890 : }
47891 0 : *info = -1;
47892 0 : *threshold = (double)(0);
47893 0 : *e = ae_maxrealnumber;
47894 :
47895 : /*
47896 : * Random split
47897 : */
47898 0 : if( s->rdfsplitstrength==0 )
47899 : {
47900 :
47901 : /*
47902 : * Evaluate minimum, maximum and randomly selected values
47903 : */
47904 0 : vmin = x->ptr.p_double[0];
47905 0 : vmax = x->ptr.p_double[0];
47906 0 : for(i=1; i<=n-1; i++)
47907 : {
47908 0 : v = x->ptr.p_double[i];
47909 0 : if( v<vmin )
47910 : {
47911 0 : vmin = v;
47912 : }
47913 0 : if( v>vmax )
47914 : {
47915 0 : vmax = v;
47916 : }
47917 : }
47918 0 : if( ae_fp_eq(vmin,vmax) )
47919 : {
47920 0 : return;
47921 : }
47922 0 : v = x->ptr.p_double[hqrnduniformi(rs, n, _state)];
47923 0 : if( ae_fp_eq(v,vmin) )
47924 : {
47925 0 : v = vmax;
47926 : }
47927 :
47928 : /*
47929 : * Calculate RMS error associated with the split
47930 : */
47931 0 : for(i=0; i<=nclasses-1; i++)
47932 : {
47933 0 : workbuf->classtotals0.ptr.p_int[i] = 0;
47934 : }
47935 0 : n0 = 0;
47936 0 : for(i=0; i<=n-1; i++)
47937 : {
47938 0 : if( x->ptr.p_double[i]<v )
47939 : {
47940 0 : k = c->ptr.p_int[i];
47941 0 : workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
47942 0 : n0 = n0+1;
47943 : }
47944 : }
47945 0 : ae_assert(n0>0&&n0<n, "RDF: critical integrity check failed at ClassifierSplit()", _state);
47946 0 : v0 = (double)1/(double)n0;
47947 0 : v1 = (double)1/(double)(n-n0);
47948 0 : rms = (double)(0);
47949 0 : for(j=0; j<=nclasses-1; j++)
47950 : {
47951 0 : k0 = workbuf->classtotals0.ptr.p_int[j];
47952 0 : k1 = workbuf->classpriors.ptr.p_int[j]-k0;
47953 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
47954 : }
47955 0 : *threshold = v;
47956 0 : *info = 1;
47957 0 : *e = rms;
47958 0 : return;
47959 : }
47960 :
47961 : /*
47962 : * Stronger splits which require us to sort the data
47963 : * Quick check for degeneracy
47964 : */
47965 0 : tagsortfasti(x, c, sortrbuf, sortibuf, n, _state);
47966 0 : v = 0.5*(x->ptr.p_double[0]+x->ptr.p_double[n-1]);
47967 0 : if( !(ae_fp_less(x->ptr.p_double[0],v)&&ae_fp_less(v,x->ptr.p_double[n-1])) )
47968 : {
47969 0 : return;
47970 : }
47971 :
47972 : /*
47973 : * Split at the middle
47974 : */
47975 0 : if( s->rdfsplitstrength==1 )
47976 : {
47977 :
47978 : /*
47979 : * Select split position
47980 : */
47981 0 : vmin = x->ptr.p_double[0];
47982 0 : vmax = x->ptr.p_double[n-1];
47983 0 : v = x->ptr.p_double[n/2];
47984 0 : if( ae_fp_eq(v,vmin) )
47985 : {
47986 0 : v = vmin+0.001*(vmax-vmin);
47987 : }
47988 0 : if( ae_fp_eq(v,vmin) )
47989 : {
47990 0 : v = vmax;
47991 : }
47992 :
47993 : /*
47994 : * Calculate RMS error associated with the split
47995 : */
47996 0 : for(i=0; i<=nclasses-1; i++)
47997 : {
47998 0 : workbuf->classtotals0.ptr.p_int[i] = 0;
47999 : }
48000 0 : n0 = 0;
48001 0 : for(i=0; i<=n-1; i++)
48002 : {
48003 0 : if( x->ptr.p_double[i]<v )
48004 : {
48005 0 : k = c->ptr.p_int[i];
48006 0 : workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
48007 0 : n0 = n0+1;
48008 : }
48009 : }
48010 0 : ae_assert(n0>0&&n0<n, "RDF: critical integrity check failed at ClassifierSplit()", _state);
48011 0 : v0 = (double)1/(double)n0;
48012 0 : v1 = (double)1/(double)(n-n0);
48013 0 : rms = (double)(0);
48014 0 : for(j=0; j<=nclasses-1; j++)
48015 : {
48016 0 : k0 = workbuf->classtotals0.ptr.p_int[j];
48017 0 : k1 = workbuf->classpriors.ptr.p_int[j]-k0;
48018 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48019 : }
48020 0 : *threshold = v;
48021 0 : *info = 1;
48022 0 : *e = rms;
48023 0 : return;
48024 : }
48025 :
48026 : /*
48027 : * Strong split
48028 : */
48029 0 : if( s->rdfsplitstrength==2 )
48030 : {
48031 :
48032 : /*
48033 : * Prepare initial split.
48034 : * Evaluate current split, prepare next one, repeat.
48035 : */
48036 0 : for(i=0; i<=nclasses-1; i++)
48037 : {
48038 0 : workbuf->classtotals0.ptr.p_int[i] = 0;
48039 : }
48040 0 : n0 = 1;
48041 0 : while(n0<n&&x->ptr.p_double[n0]==x->ptr.p_double[n0-1])
48042 : {
48043 0 : n0 = n0+1;
48044 : }
48045 0 : ae_assert(n0<n, "RDF: critical integrity check failed in ClassifierSplit()", _state);
48046 0 : for(i=0; i<=n0-1; i++)
48047 : {
48048 0 : k = c->ptr.p_int[i];
48049 0 : workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
48050 : }
48051 0 : *info = -1;
48052 0 : *threshold = x->ptr.p_double[n-1];
48053 0 : *e = ae_maxrealnumber;
48054 0 : while(n0<n)
48055 : {
48056 :
48057 : /*
48058 : * RMS error associated with current split
48059 : */
48060 0 : v0 = (double)1/(double)n0;
48061 0 : v1 = (double)1/(double)(n-n0);
48062 0 : rms = (double)(0);
48063 0 : for(j=0; j<=nclasses-1; j++)
48064 : {
48065 0 : k0 = workbuf->classtotals0.ptr.p_int[j];
48066 0 : k1 = workbuf->classpriors.ptr.p_int[j]-k0;
48067 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48068 : }
48069 0 : if( *info<0||rms<*e )
48070 : {
48071 0 : *info = 1;
48072 0 : *e = rms;
48073 0 : *threshold = 0.5*(x->ptr.p_double[n0-1]+x->ptr.p_double[n0]);
48074 0 : if( *threshold<=x->ptr.p_double[n0-1] )
48075 : {
48076 0 : *threshold = x->ptr.p_double[n0];
48077 : }
48078 : }
48079 :
48080 : /*
48081 : * Advance
48082 : */
48083 0 : n0prev = n0;
48084 0 : while(n0<n&&n0-n0prev<advanceby)
48085 : {
48086 0 : v = x->ptr.p_double[n0];
48087 0 : while(n0<n&&x->ptr.p_double[n0]==v)
48088 : {
48089 0 : k = c->ptr.p_int[n0];
48090 0 : workbuf->classtotals0.ptr.p_int[k] = workbuf->classtotals0.ptr.p_int[k]+1;
48091 0 : n0 = n0+1;
48092 : }
48093 : }
48094 : }
48095 0 : if( *info>0 )
48096 : {
48097 0 : *e = ae_sqrt(*e/(nclasses*n), _state);
48098 : }
48099 0 : return;
48100 : }
48101 0 : ae_assert(ae_false, "RDF: ClassifierSplit(), critical error", _state);
48102 : }
48103 :
48104 :
48105 : /*************************************************************************
48106 : Regression model split
48107 : *************************************************************************/
48108 0 : static void dforest_regressionsplit(decisionforestbuilder* s,
48109 : dfworkbuf* workbuf,
48110 : /* Real */ ae_vector* x,
48111 : /* Real */ ae_vector* y,
48112 : ae_int_t n,
48113 : ae_int_t* info,
48114 : double* threshold,
48115 : double* e,
48116 : /* Real */ ae_vector* sortrbuf,
48117 : /* Real */ ae_vector* sortrbuf2,
48118 : ae_state *_state)
48119 : {
48120 : ae_int_t i;
48121 : double vmin;
48122 : double vmax;
48123 : double bnd01;
48124 : double bnd12;
48125 : double bnd23;
48126 : ae_int_t total0;
48127 : ae_int_t total1;
48128 : ae_int_t total2;
48129 : ae_int_t total3;
48130 : ae_int_t cnt0;
48131 : ae_int_t cnt1;
48132 : ae_int_t cnt2;
48133 : ae_int_t cnt3;
48134 : ae_int_t n0;
48135 : ae_int_t advanceby;
48136 : double v;
48137 : double v0;
48138 : double v1;
48139 : double rms;
48140 : ae_int_t n0prev;
48141 : ae_int_t k0;
48142 : ae_int_t k1;
48143 :
48144 0 : *info = 0;
48145 0 : *threshold = 0;
48146 0 : *e = 0;
48147 :
48148 0 : advanceby = 1;
48149 0 : if( n>=20 )
48150 : {
48151 0 : advanceby = ae_maxint(2, ae_round(n*0.05, _state), _state);
48152 : }
48153 :
48154 : /*
48155 : * Sort data
48156 : * Quick check for degeneracy
48157 : */
48158 0 : tagsortfastr(x, y, sortrbuf, sortrbuf2, n, _state);
48159 0 : v = 0.5*(x->ptr.p_double[0]+x->ptr.p_double[n-1]);
48160 0 : if( !(ae_fp_less(x->ptr.p_double[0],v)&&ae_fp_less(v,x->ptr.p_double[n-1])) )
48161 : {
48162 0 : *info = -1;
48163 0 : *threshold = x->ptr.p_double[n-1];
48164 0 : *e = ae_maxrealnumber;
48165 0 : return;
48166 : }
48167 :
48168 : /*
48169 : * Prepare initial split.
48170 : * Evaluate current split, prepare next one, repeat.
48171 : */
48172 0 : vmin = y->ptr.p_double[0];
48173 0 : vmax = y->ptr.p_double[0];
48174 0 : for(i=1; i<=n-1; i++)
48175 : {
48176 0 : v = y->ptr.p_double[i];
48177 0 : if( v<vmin )
48178 : {
48179 0 : vmin = v;
48180 : }
48181 0 : if( v>vmax )
48182 : {
48183 0 : vmax = v;
48184 : }
48185 : }
48186 0 : bnd12 = 0.5*(vmin+vmax);
48187 0 : bnd01 = 0.5*(vmin+bnd12);
48188 0 : bnd23 = 0.5*(vmax+bnd12);
48189 0 : total0 = 0;
48190 0 : total1 = 0;
48191 0 : total2 = 0;
48192 0 : total3 = 0;
48193 0 : for(i=0; i<=n-1; i++)
48194 : {
48195 0 : v = y->ptr.p_double[i];
48196 0 : if( v<bnd12 )
48197 : {
48198 0 : if( v<bnd01 )
48199 : {
48200 0 : total0 = total0+1;
48201 : }
48202 : else
48203 : {
48204 0 : total1 = total1+1;
48205 : }
48206 : }
48207 : else
48208 : {
48209 0 : if( v<bnd23 )
48210 : {
48211 0 : total2 = total2+1;
48212 : }
48213 : else
48214 : {
48215 0 : total3 = total3+1;
48216 : }
48217 : }
48218 : }
48219 0 : n0 = 1;
48220 0 : while(n0<n&&x->ptr.p_double[n0]==x->ptr.p_double[n0-1])
48221 : {
48222 0 : n0 = n0+1;
48223 : }
48224 0 : ae_assert(n0<n, "RDF: critical integrity check failed in ClassifierSplit()", _state);
48225 0 : cnt0 = 0;
48226 0 : cnt1 = 0;
48227 0 : cnt2 = 0;
48228 0 : cnt3 = 0;
48229 0 : for(i=0; i<=n0-1; i++)
48230 : {
48231 0 : v = y->ptr.p_double[i];
48232 0 : if( v<bnd12 )
48233 : {
48234 0 : if( v<bnd01 )
48235 : {
48236 0 : cnt0 = cnt0+1;
48237 : }
48238 : else
48239 : {
48240 0 : cnt1 = cnt1+1;
48241 : }
48242 : }
48243 : else
48244 : {
48245 0 : if( v<bnd23 )
48246 : {
48247 0 : cnt2 = cnt2+1;
48248 : }
48249 : else
48250 : {
48251 0 : cnt3 = cnt3+1;
48252 : }
48253 : }
48254 : }
48255 0 : *info = -1;
48256 0 : *threshold = x->ptr.p_double[n-1];
48257 0 : *e = ae_maxrealnumber;
48258 0 : while(n0<n)
48259 : {
48260 :
48261 : /*
48262 : * RMS error associated with current split
48263 : */
48264 0 : v0 = (double)1/(double)n0;
48265 0 : v1 = (double)1/(double)(n-n0);
48266 0 : rms = (double)(0);
48267 0 : k0 = cnt0;
48268 0 : k1 = total0-cnt0;
48269 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48270 0 : k0 = cnt1;
48271 0 : k1 = total1-cnt1;
48272 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48273 0 : k0 = cnt2;
48274 0 : k1 = total2-cnt2;
48275 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48276 0 : k0 = cnt3;
48277 0 : k1 = total3-cnt3;
48278 0 : rms = rms+k0*(1-v0*k0)+k1*(1-v1*k1);
48279 0 : if( *info<0||rms<*e )
48280 : {
48281 0 : *info = 1;
48282 0 : *e = rms;
48283 0 : *threshold = 0.5*(x->ptr.p_double[n0-1]+x->ptr.p_double[n0]);
48284 0 : if( *threshold<=x->ptr.p_double[n0-1] )
48285 : {
48286 0 : *threshold = x->ptr.p_double[n0];
48287 : }
48288 : }
48289 :
48290 : /*
48291 : * Advance
48292 : */
48293 0 : n0prev = n0;
48294 0 : while(n0<n&&n0-n0prev<advanceby)
48295 : {
48296 0 : v0 = x->ptr.p_double[n0];
48297 0 : while(n0<n&&x->ptr.p_double[n0]==v0)
48298 : {
48299 0 : v = y->ptr.p_double[n0];
48300 0 : if( v<bnd12 )
48301 : {
48302 0 : if( v<bnd01 )
48303 : {
48304 0 : cnt0 = cnt0+1;
48305 : }
48306 : else
48307 : {
48308 0 : cnt1 = cnt1+1;
48309 : }
48310 : }
48311 : else
48312 : {
48313 0 : if( v<bnd23 )
48314 : {
48315 0 : cnt2 = cnt2+1;
48316 : }
48317 : else
48318 : {
48319 0 : cnt3 = cnt3+1;
48320 : }
48321 : }
48322 0 : n0 = n0+1;
48323 : }
48324 : }
48325 : }
48326 0 : if( *info>0 )
48327 : {
48328 0 : *e = ae_sqrt(*e/(4*n), _state);
48329 : }
48330 : }
48331 :
48332 :
48333 : /*************************************************************************
48334 : Returns split: either deterministic split at the middle of [A,B], or randomly
48335 : chosen split.
48336 :
48337 : It is guaranteed that A<Split<=B.
48338 :
48339 : -- ALGLIB --
48340 : Copyright 21.05.2018 by Bochkanov Sergey
48341 : *************************************************************************/
48342 0 : static double dforest_getsplit(decisionforestbuilder* s,
48343 : double a,
48344 : double b,
48345 : hqrndstate* rs,
48346 : ae_state *_state)
48347 : {
48348 : double result;
48349 :
48350 :
48351 0 : result = 0.5*(a+b);
48352 0 : if( ae_fp_less_eq(result,a) )
48353 : {
48354 0 : result = b;
48355 : }
48356 0 : return result;
48357 : }
48358 :
48359 :
48360 : /*************************************************************************
48361 : Outputs leaf to the tree
48362 :
48363 : Following items of TRN and OOB sets are updated in the voting buffer:
48364 : * items [Idx0,Idx1) of WorkBuf.TrnSet
48365 : * items [OOBIdx0, OOBIdx1) of WorkBuf.OOBSet
48366 :
48367 : -- ALGLIB --
48368 : Copyright 21.05.2018 by Bochkanov Sergey
48369 : *************************************************************************/
48370 0 : static void dforest_outputleaf(decisionforestbuilder* s,
48371 : dfworkbuf* workbuf,
48372 : /* Real */ ae_vector* treebuf,
48373 : dfvotebuf* votebuf,
48374 : ae_int_t idx0,
48375 : ae_int_t idx1,
48376 : ae_int_t oobidx0,
48377 : ae_int_t oobidx1,
48378 : ae_int_t* treesize,
48379 : double leafval,
48380 : ae_state *_state)
48381 : {
48382 : ae_int_t leafvali;
48383 : ae_int_t nclasses;
48384 : ae_int_t i;
48385 : ae_int_t j;
48386 :
48387 :
48388 0 : nclasses = s->nclasses;
48389 0 : if( nclasses==1 )
48390 : {
48391 :
48392 : /*
48393 : * Store split to the tree
48394 : */
48395 0 : treebuf->ptr.p_double[*treesize] = (double)(-1);
48396 0 : treebuf->ptr.p_double[*treesize+1] = leafval;
48397 :
48398 : /*
48399 : * Update training and OOB voting stats
48400 : */
48401 0 : for(i=idx0; i<=idx1-1; i++)
48402 : {
48403 0 : j = workbuf->trnset.ptr.p_int[i];
48404 0 : votebuf->trntotals.ptr.p_double[j] = votebuf->trntotals.ptr.p_double[j]+leafval;
48405 0 : votebuf->trncounts.ptr.p_int[j] = votebuf->trncounts.ptr.p_int[j]+1;
48406 : }
48407 0 : for(i=oobidx0; i<=oobidx1-1; i++)
48408 : {
48409 0 : j = workbuf->oobset.ptr.p_int[i];
48410 0 : votebuf->oobtotals.ptr.p_double[j] = votebuf->oobtotals.ptr.p_double[j]+leafval;
48411 0 : votebuf->oobcounts.ptr.p_int[j] = votebuf->oobcounts.ptr.p_int[j]+1;
48412 : }
48413 : }
48414 : else
48415 : {
48416 :
48417 : /*
48418 : * Store split to the tree
48419 : */
48420 0 : treebuf->ptr.p_double[*treesize] = (double)(-1);
48421 0 : treebuf->ptr.p_double[*treesize+1] = leafval;
48422 :
48423 : /*
48424 : * Update training and OOB voting stats
48425 : */
48426 0 : leafvali = ae_round(leafval, _state);
48427 0 : for(i=idx0; i<=idx1-1; i++)
48428 : {
48429 0 : j = workbuf->trnset.ptr.p_int[i];
48430 0 : votebuf->trntotals.ptr.p_double[j*nclasses+leafvali] = votebuf->trntotals.ptr.p_double[j*nclasses+leafvali]+1;
48431 0 : votebuf->trncounts.ptr.p_int[j] = votebuf->trncounts.ptr.p_int[j]+1;
48432 : }
48433 0 : for(i=oobidx0; i<=oobidx1-1; i++)
48434 : {
48435 0 : j = workbuf->oobset.ptr.p_int[i];
48436 0 : votebuf->oobtotals.ptr.p_double[j*nclasses+leafvali] = votebuf->oobtotals.ptr.p_double[j*nclasses+leafvali]+1;
48437 0 : votebuf->oobcounts.ptr.p_int[j] = votebuf->oobcounts.ptr.p_int[j]+1;
48438 : }
48439 : }
48440 0 : *treesize = *treesize+dforest_leafnodewidth;
48441 0 : }
48442 :
48443 :
48444 : /*************************************************************************
48445 : This function performs generic and algorithm-specific preprocessing of the
48446 : dataset
48447 :
48448 : -- ALGLIB --
48449 : Copyright 21.05.2018 by Bochkanov Sergey
48450 : *************************************************************************/
48451 0 : static void dforest_analyzeandpreprocessdataset(decisionforestbuilder* s,
48452 : ae_state *_state)
48453 : {
48454 : ae_frame _frame_block;
48455 : ae_int_t nvars;
48456 : ae_int_t nclasses;
48457 : ae_int_t npoints;
48458 : ae_int_t i;
48459 : ae_int_t j;
48460 : ae_bool isbinary;
48461 : double v;
48462 : double v0;
48463 : double v1;
48464 : hqrndstate rs;
48465 :
48466 0 : ae_frame_make(_state, &_frame_block);
48467 0 : memset(&rs, 0, sizeof(rs));
48468 0 : _hqrndstate_init(&rs, _state, ae_true);
48469 :
48470 0 : ae_assert(s->dstype==0, "no sparsity", _state);
48471 0 : npoints = s->npoints;
48472 0 : nvars = s->nvars;
48473 0 : nclasses = s->nclasses;
48474 :
48475 : /*
48476 : * seed local RNG
48477 : */
48478 0 : if( s->rdfglobalseed>0 )
48479 : {
48480 0 : hqrndseed(s->rdfglobalseed, 3532, &rs, _state);
48481 : }
48482 : else
48483 : {
48484 0 : hqrndseed(ae_randominteger(30000, _state), 3532, &rs, _state);
48485 : }
48486 :
48487 : /*
48488 : * Generic processing
48489 : */
48490 0 : ae_assert(npoints>=1, "BuildRandomForest: integrity check failed", _state);
48491 0 : rvectorsetlengthatleast(&s->dsmin, nvars, _state);
48492 0 : rvectorsetlengthatleast(&s->dsmax, nvars, _state);
48493 0 : bvectorsetlengthatleast(&s->dsbinary, nvars, _state);
48494 0 : for(i=0; i<=nvars-1; i++)
48495 : {
48496 0 : v0 = s->dsdata.ptr.p_double[i*npoints+0];
48497 0 : v1 = s->dsdata.ptr.p_double[i*npoints+0];
48498 0 : for(j=1; j<=npoints-1; j++)
48499 : {
48500 0 : v = s->dsdata.ptr.p_double[i*npoints+j];
48501 0 : if( v<v0 )
48502 : {
48503 0 : v0 = v;
48504 : }
48505 0 : if( v>v1 )
48506 : {
48507 0 : v1 = v;
48508 : }
48509 : }
48510 0 : s->dsmin.ptr.p_double[i] = v0;
48511 0 : s->dsmax.ptr.p_double[i] = v1;
48512 0 : ae_assert(ae_fp_less_eq(v0,v1), "BuildRandomForest: strange integrity check failure", _state);
48513 0 : isbinary = ae_true;
48514 0 : for(j=0; j<=npoints-1; j++)
48515 : {
48516 0 : v = s->dsdata.ptr.p_double[i*npoints+j];
48517 0 : isbinary = isbinary&&(v==v0||v==v1);
48518 : }
48519 0 : s->dsbinary.ptr.p_bool[i] = isbinary;
48520 : }
48521 0 : if( nclasses==1 )
48522 : {
48523 0 : s->dsravg = (double)(0);
48524 0 : for(i=0; i<=npoints-1; i++)
48525 : {
48526 0 : s->dsravg = s->dsravg+s->dsrval.ptr.p_double[i];
48527 : }
48528 0 : s->dsravg = s->dsravg/npoints;
48529 : }
48530 : else
48531 : {
48532 0 : ivectorsetlengthatleast(&s->dsctotals, nclasses, _state);
48533 0 : for(i=0; i<=nclasses-1; i++)
48534 : {
48535 0 : s->dsctotals.ptr.p_int[i] = 0;
48536 : }
48537 0 : for(i=0; i<=npoints-1; i++)
48538 : {
48539 0 : s->dsctotals.ptr.p_int[s->dsival.ptr.p_int[i]] = s->dsctotals.ptr.p_int[s->dsival.ptr.p_int[i]]+1;
48540 : }
48541 : }
48542 0 : ae_frame_leave(_state);
48543 0 : }
48544 :
48545 :
48546 : /*************************************************************************
48547 : This function merges together trees generated during training and outputs
48548 : it to the decision forest.
48549 :
48550 : INPUT PARAMETERS:
48551 : S - decision forest builder object
48552 : NTrees - NTrees>=1, number of trees to train
48553 :
48554 : OUTPUT PARAMETERS:
48555 : DF - decision forest
48556 : Rep - report
48557 :
48558 : -- ALGLIB --
48559 : Copyright 21.05.2018 by Bochkanov Sergey
48560 : *************************************************************************/
48561 0 : static void dforest_mergetrees(decisionforestbuilder* s,
48562 : decisionforest* df,
48563 : ae_state *_state)
48564 : {
48565 : ae_frame _frame_block;
48566 : ae_int_t i;
48567 : ae_int_t cursize;
48568 : ae_int_t offs;
48569 : dftreebuf *tree;
48570 : ae_smart_ptr _tree;
48571 : ae_vector treesizes;
48572 : ae_vector treeoffsets;
48573 :
48574 0 : ae_frame_make(_state, &_frame_block);
48575 0 : memset(&_tree, 0, sizeof(_tree));
48576 0 : memset(&treesizes, 0, sizeof(treesizes));
48577 0 : memset(&treeoffsets, 0, sizeof(treeoffsets));
48578 0 : ae_smart_ptr_init(&_tree, (void**)&tree, _state, ae_true);
48579 0 : ae_vector_init(&treesizes, 0, DT_INT, _state, ae_true);
48580 0 : ae_vector_init(&treeoffsets, 0, DT_INT, _state, ae_true);
48581 :
48582 0 : df->forestformat = dforest_dfuncompressedv0;
48583 0 : df->nvars = s->nvars;
48584 0 : df->nclasses = s->nclasses;
48585 0 : df->bufsize = 0;
48586 0 : df->ntrees = 0;
48587 :
48588 : /*
48589 : * Determine trees count
48590 : */
48591 0 : ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
48592 0 : while(tree!=NULL)
48593 : {
48594 0 : df->ntrees = df->ntrees+1;
48595 0 : ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
48596 : }
48597 0 : ae_assert(df->ntrees>0, "MergeTrees: integrity check failed, zero trees count", _state);
48598 :
48599 : /*
48600 : * Determine individual tree sizes and total buffer size
48601 : */
48602 0 : ae_vector_set_length(&treesizes, df->ntrees, _state);
48603 0 : for(i=0; i<=df->ntrees-1; i++)
48604 : {
48605 0 : treesizes.ptr.p_int[i] = -1;
48606 : }
48607 0 : ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
48608 0 : while(tree!=NULL)
48609 : {
48610 0 : ae_assert(tree->treeidx>=0&&tree->treeidx<df->ntrees, "MergeTrees: integrity check failed (wrong TreeIdx)", _state);
48611 0 : ae_assert(treesizes.ptr.p_int[tree->treeidx]<0, "MergeTrees: integrity check failed (duplicate TreeIdx)", _state);
48612 0 : df->bufsize = df->bufsize+ae_round(tree->treebuf.ptr.p_double[0], _state);
48613 0 : treesizes.ptr.p_int[tree->treeidx] = ae_round(tree->treebuf.ptr.p_double[0], _state);
48614 0 : ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
48615 : }
48616 0 : for(i=0; i<=df->ntrees-1; i++)
48617 : {
48618 0 : ae_assert(treesizes.ptr.p_int[i]>0, "MergeTrees: integrity check failed (wrong TreeSize)", _state);
48619 : }
48620 :
48621 : /*
48622 : * Determine offsets for individual trees in output buffer
48623 : */
48624 0 : ae_vector_set_length(&treeoffsets, df->ntrees, _state);
48625 0 : treeoffsets.ptr.p_int[0] = 0;
48626 0 : for(i=1; i<=df->ntrees-1; i++)
48627 : {
48628 0 : treeoffsets.ptr.p_int[i] = treeoffsets.ptr.p_int[i-1]+treesizes.ptr.p_int[i-1];
48629 : }
48630 :
48631 : /*
48632 : * Output trees
48633 : *
48634 : * NOTE: since ALGLIB 3.16.0 trees are sorted by tree index prior to
48635 : * output (necessary for variable importance estimation), that's
48636 : * why we need array of tree offsets
48637 : */
48638 0 : ae_vector_set_length(&df->trees, df->bufsize, _state);
48639 0 : ae_shared_pool_first_recycled(&s->treepool, &_tree, _state);
48640 0 : while(tree!=NULL)
48641 : {
48642 0 : cursize = ae_round(tree->treebuf.ptr.p_double[0], _state);
48643 0 : offs = treeoffsets.ptr.p_int[tree->treeidx];
48644 0 : for(i=0; i<=cursize-1; i++)
48645 : {
48646 0 : df->trees.ptr.p_double[offs+i] = tree->treebuf.ptr.p_double[i];
48647 : }
48648 0 : ae_shared_pool_next_recycled(&s->treepool, &_tree, _state);
48649 : }
48650 0 : ae_frame_leave(_state);
48651 0 : }
48652 :
48653 :
48654 : /*************************************************************************
48655 : This function post-processes voting array and calculates TRN and OOB errors.
48656 :
48657 : INPUT PARAMETERS:
48658 : S - decision forest builder object
48659 : NTrees - number of trees in the forest
48660 : Buf - possibly preallocated vote buffer, its contents is
48661 : overwritten by this function
48662 :
48663 : OUTPUT PARAMETERS:
48664 : Rep - report fields corresponding to errors are updated
48665 :
48666 : -- ALGLIB --
48667 : Copyright 21.05.2018 by Bochkanov Sergey
48668 : *************************************************************************/
48669 0 : static void dforest_processvotingresults(decisionforestbuilder* s,
48670 : ae_int_t ntrees,
48671 : dfvotebuf* buf,
48672 : dfreport* rep,
48673 : ae_state *_state)
48674 : {
48675 : ae_frame _frame_block;
48676 : dfvotebuf *vote;
48677 : ae_smart_ptr _vote;
48678 : ae_int_t nvars;
48679 : ae_int_t nclasses;
48680 : ae_int_t npoints;
48681 : ae_int_t i;
48682 : ae_int_t j;
48683 : ae_int_t k;
48684 : ae_int_t k1;
48685 : double v;
48686 : ae_int_t avgrelcnt;
48687 : ae_int_t oobavgrelcnt;
48688 :
48689 0 : ae_frame_make(_state, &_frame_block);
48690 0 : memset(&_vote, 0, sizeof(_vote));
48691 0 : ae_smart_ptr_init(&_vote, (void**)&vote, _state, ae_true);
48692 :
48693 0 : npoints = s->npoints;
48694 0 : nvars = s->nvars;
48695 0 : nclasses = s->nclasses;
48696 0 : ae_assert(npoints>0, "DFOREST: integrity check failed", _state);
48697 0 : ae_assert(nvars>0, "DFOREST: integrity check failed", _state);
48698 0 : ae_assert(nclasses>0, "DFOREST: integrity check failed", _state);
48699 :
48700 : /*
48701 : * Prepare vote buffer
48702 : */
48703 0 : rvectorsetlengthatleast(&buf->trntotals, npoints*nclasses, _state);
48704 0 : rvectorsetlengthatleast(&buf->oobtotals, npoints*nclasses, _state);
48705 0 : for(i=0; i<=npoints*nclasses-1; i++)
48706 : {
48707 0 : buf->trntotals.ptr.p_double[i] = (double)(0);
48708 0 : buf->oobtotals.ptr.p_double[i] = (double)(0);
48709 : }
48710 0 : ivectorsetlengthatleast(&buf->trncounts, npoints, _state);
48711 0 : ivectorsetlengthatleast(&buf->oobcounts, npoints, _state);
48712 0 : for(i=0; i<=npoints-1; i++)
48713 : {
48714 0 : buf->trncounts.ptr.p_int[i] = 0;
48715 0 : buf->oobcounts.ptr.p_int[i] = 0;
48716 : }
48717 :
48718 : /*
48719 : * Merge voting arrays
48720 : */
48721 0 : ae_shared_pool_first_recycled(&s->votepool, &_vote, _state);
48722 0 : while(vote!=NULL)
48723 : {
48724 0 : for(i=0; i<=npoints*nclasses-1; i++)
48725 : {
48726 0 : buf->trntotals.ptr.p_double[i] = buf->trntotals.ptr.p_double[i]+vote->trntotals.ptr.p_double[i]+vote->oobtotals.ptr.p_double[i];
48727 0 : buf->oobtotals.ptr.p_double[i] = buf->oobtotals.ptr.p_double[i]+vote->oobtotals.ptr.p_double[i];
48728 : }
48729 0 : for(i=0; i<=npoints-1; i++)
48730 : {
48731 0 : buf->trncounts.ptr.p_int[i] = buf->trncounts.ptr.p_int[i]+vote->trncounts.ptr.p_int[i]+vote->oobcounts.ptr.p_int[i];
48732 0 : buf->oobcounts.ptr.p_int[i] = buf->oobcounts.ptr.p_int[i]+vote->oobcounts.ptr.p_int[i];
48733 : }
48734 0 : ae_shared_pool_next_recycled(&s->votepool, &_vote, _state);
48735 : }
48736 0 : for(i=0; i<=npoints-1; i++)
48737 : {
48738 0 : v = 1/coalesce((double)(buf->trncounts.ptr.p_int[i]), (double)(1), _state);
48739 0 : for(j=0; j<=nclasses-1; j++)
48740 : {
48741 0 : buf->trntotals.ptr.p_double[i*nclasses+j] = buf->trntotals.ptr.p_double[i*nclasses+j]*v;
48742 : }
48743 0 : v = 1/coalesce((double)(buf->oobcounts.ptr.p_int[i]), (double)(1), _state);
48744 0 : for(j=0; j<=nclasses-1; j++)
48745 : {
48746 0 : buf->oobtotals.ptr.p_double[i*nclasses+j] = buf->oobtotals.ptr.p_double[i*nclasses+j]*v;
48747 : }
48748 : }
48749 :
48750 : /*
48751 : * Use aggregated voting data to output error metrics
48752 : */
48753 0 : avgrelcnt = 0;
48754 0 : oobavgrelcnt = 0;
48755 0 : rep->rmserror = (double)(0);
48756 0 : rep->avgerror = (double)(0);
48757 0 : rep->avgrelerror = (double)(0);
48758 0 : rep->relclserror = (double)(0);
48759 0 : rep->avgce = (double)(0);
48760 0 : rep->oobrmserror = (double)(0);
48761 0 : rep->oobavgerror = (double)(0);
48762 0 : rep->oobavgrelerror = (double)(0);
48763 0 : rep->oobrelclserror = (double)(0);
48764 0 : rep->oobavgce = (double)(0);
48765 0 : for(i=0; i<=npoints-1; i++)
48766 : {
48767 0 : if( nclasses>1 )
48768 : {
48769 :
48770 : /*
48771 : * classification-specific code
48772 : */
48773 0 : k = s->dsival.ptr.p_int[i];
48774 0 : for(j=0; j<=nclasses-1; j++)
48775 : {
48776 0 : v = buf->trntotals.ptr.p_double[i*nclasses+j];
48777 0 : if( j==k )
48778 : {
48779 0 : rep->avgce = rep->avgce-ae_log(coalesce(v, ae_minrealnumber, _state), _state);
48780 0 : rep->rmserror = rep->rmserror+ae_sqr(v-1, _state);
48781 0 : rep->avgerror = rep->avgerror+ae_fabs(v-1, _state);
48782 0 : rep->avgrelerror = rep->avgrelerror+ae_fabs(v-1, _state);
48783 0 : inc(&avgrelcnt, _state);
48784 : }
48785 : else
48786 : {
48787 0 : rep->rmserror = rep->rmserror+ae_sqr(v, _state);
48788 0 : rep->avgerror = rep->avgerror+ae_fabs(v, _state);
48789 : }
48790 0 : v = buf->oobtotals.ptr.p_double[i*nclasses+j];
48791 0 : if( j==k )
48792 : {
48793 0 : rep->oobavgce = rep->oobavgce-ae_log(coalesce(v, ae_minrealnumber, _state), _state);
48794 0 : rep->oobrmserror = rep->oobrmserror+ae_sqr(v-1, _state);
48795 0 : rep->oobavgerror = rep->oobavgerror+ae_fabs(v-1, _state);
48796 0 : rep->oobavgrelerror = rep->oobavgrelerror+ae_fabs(v-1, _state);
48797 0 : inc(&oobavgrelcnt, _state);
48798 : }
48799 : else
48800 : {
48801 0 : rep->oobrmserror = rep->oobrmserror+ae_sqr(v, _state);
48802 0 : rep->oobavgerror = rep->oobavgerror+ae_fabs(v, _state);
48803 : }
48804 : }
48805 :
48806 : /*
48807 : * Classification errors are handled separately
48808 : */
48809 0 : k1 = 0;
48810 0 : for(j=1; j<=nclasses-1; j++)
48811 : {
48812 0 : if( buf->trntotals.ptr.p_double[i*nclasses+j]>buf->trntotals.ptr.p_double[i*nclasses+k1] )
48813 : {
48814 0 : k1 = j;
48815 : }
48816 : }
48817 0 : if( k1!=k )
48818 : {
48819 0 : rep->relclserror = rep->relclserror+1;
48820 : }
48821 0 : k1 = 0;
48822 0 : for(j=1; j<=nclasses-1; j++)
48823 : {
48824 0 : if( buf->oobtotals.ptr.p_double[i*nclasses+j]>buf->oobtotals.ptr.p_double[i*nclasses+k1] )
48825 : {
48826 0 : k1 = j;
48827 : }
48828 : }
48829 0 : if( k1!=k )
48830 : {
48831 0 : rep->oobrelclserror = rep->oobrelclserror+1;
48832 : }
48833 : }
48834 : else
48835 : {
48836 :
48837 : /*
48838 : * regression-specific code
48839 : */
48840 0 : v = buf->trntotals.ptr.p_double[i]-s->dsrval.ptr.p_double[i];
48841 0 : rep->rmserror = rep->rmserror+ae_sqr(v, _state);
48842 0 : rep->avgerror = rep->avgerror+ae_fabs(v, _state);
48843 0 : if( ae_fp_neq(s->dsrval.ptr.p_double[i],(double)(0)) )
48844 : {
48845 0 : rep->avgrelerror = rep->avgrelerror+ae_fabs(v/s->dsrval.ptr.p_double[i], _state);
48846 0 : avgrelcnt = avgrelcnt+1;
48847 : }
48848 0 : v = buf->oobtotals.ptr.p_double[i]-s->dsrval.ptr.p_double[i];
48849 0 : rep->oobrmserror = rep->oobrmserror+ae_sqr(v, _state);
48850 0 : rep->oobavgerror = rep->oobavgerror+ae_fabs(v, _state);
48851 0 : if( ae_fp_neq(s->dsrval.ptr.p_double[i],(double)(0)) )
48852 : {
48853 0 : rep->oobavgrelerror = rep->oobavgrelerror+ae_fabs(v/s->dsrval.ptr.p_double[i], _state);
48854 0 : oobavgrelcnt = oobavgrelcnt+1;
48855 : }
48856 : }
48857 : }
48858 0 : rep->relclserror = rep->relclserror/npoints;
48859 0 : rep->rmserror = ae_sqrt(rep->rmserror/(npoints*nclasses), _state);
48860 0 : rep->avgerror = rep->avgerror/(npoints*nclasses);
48861 0 : rep->avgrelerror = rep->avgrelerror/coalesce((double)(avgrelcnt), (double)(1), _state);
48862 0 : rep->oobrelclserror = rep->oobrelclserror/npoints;
48863 0 : rep->oobrmserror = ae_sqrt(rep->oobrmserror/(npoints*nclasses), _state);
48864 0 : rep->oobavgerror = rep->oobavgerror/(npoints*nclasses);
48865 0 : rep->oobavgrelerror = rep->oobavgrelerror/coalesce((double)(oobavgrelcnt), (double)(1), _state);
48866 0 : ae_frame_leave(_state);
48867 0 : }
48868 :
48869 :
48870 : /*************************************************************************
48871 : This function performs binary compression of decision forest, using either
48872 : 8-bit mantissa (a bit more compact representation) or 16-bit mantissa for
48873 : splits and regression outputs.
48874 :
48875 : Forest is compressed in-place.
48876 :
48877 : Return value is a compression factor.
48878 :
48879 : -- ALGLIB --
48880 : Copyright 22.07.2019 by Bochkanov Sergey
48881 : *************************************************************************/
48882 0 : static double dforest_binarycompression(decisionforest* df,
48883 : ae_bool usemantissa8,
48884 : ae_state *_state)
48885 : {
48886 : ae_frame _frame_block;
48887 : ae_int_t size8;
48888 : ae_int_t size8i;
48889 : ae_int_t offssrc;
48890 : ae_int_t offsdst;
48891 : ae_int_t i;
48892 : ae_vector dummyi;
48893 : ae_int_t maxrawtreesize;
48894 : ae_vector compressedsizes;
48895 : double result;
48896 :
48897 0 : ae_frame_make(_state, &_frame_block);
48898 0 : memset(&dummyi, 0, sizeof(dummyi));
48899 0 : memset(&compressedsizes, 0, sizeof(compressedsizes));
48900 0 : ae_vector_init(&dummyi, 0, DT_INT, _state, ae_true);
48901 0 : ae_vector_init(&compressedsizes, 0, DT_INT, _state, ae_true);
48902 :
48903 :
48904 : /*
48905 : * Quick exit if already compressed
48906 : */
48907 0 : if( df->forestformat==dforest_dfcompressedv0 )
48908 : {
48909 0 : result = (double)(1);
48910 0 : ae_frame_leave(_state);
48911 0 : return result;
48912 : }
48913 :
48914 : /*
48915 : * Check that source format is supported
48916 : */
48917 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "BinaryCompression: unexpected forest format", _state);
48918 :
48919 : /*
48920 : * Compute sizes of uncompressed and compressed trees.
48921 : */
48922 0 : size8 = 0;
48923 0 : offssrc = 0;
48924 0 : maxrawtreesize = 0;
48925 0 : for(i=0; i<=df->ntrees-1; i++)
48926 : {
48927 0 : size8i = dforest_computecompressedsizerec(df, usemantissa8, offssrc, offssrc+1, &dummyi, ae_false, _state);
48928 0 : size8 = size8+dforest_computecompresseduintsize(size8i, _state)+size8i;
48929 0 : maxrawtreesize = ae_maxint(maxrawtreesize, ae_round(df->trees.ptr.p_double[offssrc], _state), _state);
48930 0 : offssrc = offssrc+ae_round(df->trees.ptr.p_double[offssrc], _state);
48931 : }
48932 0 : result = (double)(8*df->trees.cnt)/(double)(size8+1);
48933 :
48934 : /*
48935 : * Allocate memory and perform compression
48936 : */
48937 0 : ae_vector_set_length(&(df->trees8), size8, _state);
48938 0 : ae_vector_set_length(&compressedsizes, maxrawtreesize, _state);
48939 0 : offssrc = 0;
48940 0 : offsdst = 0;
48941 0 : for(i=0; i<=df->ntrees-1; i++)
48942 : {
48943 :
48944 : /*
48945 : * Call compressed size evaluator one more time, now saving subtree sizes into temporary array
48946 : */
48947 0 : size8i = dforest_computecompressedsizerec(df, usemantissa8, offssrc, offssrc+1, &compressedsizes, ae_true, _state);
48948 :
48949 : /*
48950 : * Output tree header (length in bytes)
48951 : */
48952 0 : dforest_streamuint(&df->trees8, &offsdst, size8i, _state);
48953 :
48954 : /*
48955 : * Compress recursively
48956 : */
48957 0 : dforest_compressrec(df, usemantissa8, offssrc, offssrc+1, &compressedsizes, &df->trees8, &offsdst, _state);
48958 :
48959 : /*
48960 : * Next tree
48961 : */
48962 0 : offssrc = offssrc+ae_round(df->trees.ptr.p_double[offssrc], _state);
48963 : }
48964 0 : ae_assert(offsdst==size8, "BinaryCompression: integrity check failed (stream length)", _state);
48965 :
48966 : /*
48967 : * Finalize forest conversion, clear previously allocated memory
48968 : */
48969 0 : df->forestformat = dforest_dfcompressedv0;
48970 0 : df->usemantissa8 = usemantissa8;
48971 0 : ae_vector_set_length(&df->trees, 0, _state);
48972 0 : ae_frame_leave(_state);
48973 0 : return result;
48974 : }
48975 :
48976 :
48977 : /*************************************************************************
48978 : This function returns exact number of bytes required to store compressed
48979 : version of the tree starting at location TreeBase.
48980 :
48981 : PARAMETERS:
48982 : DF - decision forest
48983 : UseMantissa8 - whether 8-bit or 16-bit mantissas are used to store
48984 : floating point numbers
48985 : TreeRoot - root of the specific tree being stored (offset in DF.Trees)
48986 : TreePos - position within tree (first location in the tree
48987 : is TreeRoot+1)
48988 : CompressedSizes - not referenced if SaveCompressedSizes is False;
48989 : otherwise, values computed by this function for
48990 : specific values of TreePos are stored to
48991 : CompressedSizes[TreePos-TreeRoot] (other elements
48992 : of the array are not referenced).
48993 : This array must be preallocated by caller.
48994 :
48995 : -- ALGLIB --
48996 : Copyright 22.07.2019 by Bochkanov Sergey
48997 : *************************************************************************/
48998 0 : static ae_int_t dforest_computecompressedsizerec(decisionforest* df,
48999 : ae_bool usemantissa8,
49000 : ae_int_t treeroot,
49001 : ae_int_t treepos,
49002 : /* Integer */ ae_vector* compressedsizes,
49003 : ae_bool savecompressedsizes,
49004 : ae_state *_state)
49005 : {
49006 : ae_int_t jmponbranch;
49007 : ae_int_t child0size;
49008 : ae_int_t child1size;
49009 : ae_int_t fpwidth;
49010 : ae_int_t result;
49011 :
49012 :
49013 0 : if( usemantissa8 )
49014 : {
49015 0 : fpwidth = 2;
49016 : }
49017 : else
49018 : {
49019 0 : fpwidth = 3;
49020 : }
49021 :
49022 : /*
49023 : * Leaf or split?
49024 : */
49025 0 : if( ae_fp_eq(df->trees.ptr.p_double[treepos],(double)(-1)) )
49026 : {
49027 :
49028 : /*
49029 : * Leaf
49030 : */
49031 0 : result = dforest_computecompresseduintsize(2*df->nvars, _state);
49032 0 : if( df->nclasses==1 )
49033 : {
49034 0 : result = result+fpwidth;
49035 : }
49036 : else
49037 : {
49038 0 : result = result+dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos+1], _state), _state);
49039 : }
49040 : }
49041 : else
49042 : {
49043 :
49044 : /*
49045 : * Split
49046 : */
49047 0 : jmponbranch = ae_round(df->trees.ptr.p_double[treepos+2], _state);
49048 0 : child0size = dforest_computecompressedsizerec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, savecompressedsizes, _state);
49049 0 : child1size = dforest_computecompressedsizerec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, savecompressedsizes, _state);
49050 0 : if( child0size<=child1size )
49051 : {
49052 :
49053 : /*
49054 : * Child #0 comes first because it is shorter
49055 : */
49056 0 : result = dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos], _state), _state);
49057 0 : result = result+fpwidth;
49058 0 : result = result+dforest_computecompresseduintsize(child0size, _state);
49059 : }
49060 : else
49061 : {
49062 :
49063 : /*
49064 : * Child #1 comes first because it is shorter
49065 : */
49066 0 : result = dforest_computecompresseduintsize(ae_round(df->trees.ptr.p_double[treepos], _state)+df->nvars, _state);
49067 0 : result = result+fpwidth;
49068 0 : result = result+dforest_computecompresseduintsize(child1size, _state);
49069 : }
49070 0 : result = result+child0size+child1size;
49071 : }
49072 :
49073 : /*
49074 : * Do we have to save compressed sizes?
49075 : */
49076 0 : if( savecompressedsizes )
49077 : {
49078 0 : ae_assert(treepos-treeroot<compressedsizes->cnt, "ComputeCompressedSizeRec: integrity check failed", _state);
49079 0 : compressedsizes->ptr.p_int[treepos-treeroot] = result;
49080 : }
49081 0 : return result;
49082 : }
49083 :
49084 :
49085 : /*************************************************************************
49086 : This function returns exact number of bytes required to store compressed
49087 : version of the tree starting at location TreeBase.
49088 :
49089 : PARAMETERS:
49090 : DF - decision forest
49091 : UseMantissa8 - whether 8-bit or 16-bit mantissas are used to store
49092 : floating point numbers
49093 : TreeRoot - root of the specific tree being stored (offset in DF.Trees)
49094 : TreePos - position within tree (first location in the tree
49095 : is TreeRoot+1)
49096 : CompressedSizes - not referenced if SaveCompressedSizes is False;
49097 : otherwise, values computed by this function for
49098 : specific values of TreePos are stored to
49099 : CompressedSizes[TreePos-TreeRoot] (other elements
49100 : of the array are not referenced).
49101 : This array must be preallocated by caller.
49102 :
49103 : -- ALGLIB --
49104 : Copyright 22.07.2019 by Bochkanov Sergey
49105 : *************************************************************************/
49106 0 : static void dforest_compressrec(decisionforest* df,
49107 : ae_bool usemantissa8,
49108 : ae_int_t treeroot,
49109 : ae_int_t treepos,
49110 : /* Integer */ ae_vector* compressedsizes,
49111 : ae_vector* buf,
49112 : ae_int_t* dstoffs,
49113 : ae_state *_state)
49114 : {
49115 : ae_int_t jmponbranch;
49116 : ae_int_t child0size;
49117 : ae_int_t child1size;
49118 : ae_int_t varidx;
49119 : double leafval;
49120 : double splitval;
49121 : ae_int_t dstoffsold;
49122 :
49123 :
49124 0 : dstoffsold = *dstoffs;
49125 :
49126 : /*
49127 : * Leaf or split?
49128 : */
49129 0 : varidx = ae_round(df->trees.ptr.p_double[treepos], _state);
49130 0 : if( varidx==-1 )
49131 : {
49132 :
49133 : /*
49134 : * Leaf node:
49135 : * * stream special value which denotes leaf (2*NVars)
49136 : * * then, stream scalar value (floating point) or class number (unsigned integer)
49137 : */
49138 0 : leafval = df->trees.ptr.p_double[treepos+1];
49139 0 : dforest_streamuint(buf, dstoffs, 2*df->nvars, _state);
49140 0 : if( df->nclasses==1 )
49141 : {
49142 0 : dforest_streamfloat(buf, usemantissa8, dstoffs, leafval, _state);
49143 : }
49144 : else
49145 : {
49146 0 : dforest_streamuint(buf, dstoffs, ae_round(leafval, _state), _state);
49147 : }
49148 : }
49149 : else
49150 : {
49151 :
49152 : /*
49153 : * Split node:
49154 : * * fetch compressed sizes of child nodes, decide which child goes first
49155 : */
49156 0 : jmponbranch = ae_round(df->trees.ptr.p_double[treepos+2], _state);
49157 0 : splitval = df->trees.ptr.p_double[treepos+1];
49158 0 : child0size = compressedsizes->ptr.p_int[treepos+dforest_innernodewidth-treeroot];
49159 0 : child1size = compressedsizes->ptr.p_int[treeroot+jmponbranch-treeroot];
49160 0 : if( child0size<=child1size )
49161 : {
49162 :
49163 : /*
49164 : * Child #0 comes first because it is shorter:
49165 : * * stream variable index used for splitting;
49166 : * value in [0,NVars) range indicates that split is
49167 : * "if VAR<VAL then BRANCH0 else BRANCH1"
49168 : * * stream value used for splitting
49169 : * * stream children #0 and #1
49170 : */
49171 0 : dforest_streamuint(buf, dstoffs, varidx, _state);
49172 0 : dforest_streamfloat(buf, usemantissa8, dstoffs, splitval, _state);
49173 0 : dforest_streamuint(buf, dstoffs, child0size, _state);
49174 0 : dforest_compressrec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, buf, dstoffs, _state);
49175 0 : dforest_compressrec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, buf, dstoffs, _state);
49176 : }
49177 : else
49178 : {
49179 :
49180 : /*
49181 : * Child #1 comes first because it is shorter:
49182 : * * stream variable index used for splitting + NVars;
49183 : * value in [NVars,2*NVars) range indicates that split is
49184 : * "if VAR>=VAL then BRANCH0 else BRANCH1"
49185 : * * stream value used for splitting
49186 : * * stream children #0 and #1
49187 : */
49188 0 : dforest_streamuint(buf, dstoffs, varidx+df->nvars, _state);
49189 0 : dforest_streamfloat(buf, usemantissa8, dstoffs, splitval, _state);
49190 0 : dforest_streamuint(buf, dstoffs, child1size, _state);
49191 0 : dforest_compressrec(df, usemantissa8, treeroot, treeroot+jmponbranch, compressedsizes, buf, dstoffs, _state);
49192 0 : dforest_compressrec(df, usemantissa8, treeroot, treepos+dforest_innernodewidth, compressedsizes, buf, dstoffs, _state);
49193 : }
49194 : }
49195 :
49196 : /*
49197 : * Integrity check at the end
49198 : */
49199 0 : ae_assert(*dstoffs-dstoffsold==compressedsizes->ptr.p_int[treepos-treeroot], "CompressRec: integrity check failed (compressed size at leaf)", _state);
49200 0 : }
49201 :
49202 :
49203 : /*************************************************************************
49204 : This function returns exact number of bytes required to store compressed
49205 : unsigned integer number (negative arguments result in assertion being
49206 : generated).
49207 :
49208 : -- ALGLIB --
49209 : Copyright 22.07.2019 by Bochkanov Sergey
49210 : *************************************************************************/
49211 0 : static ae_int_t dforest_computecompresseduintsize(ae_int_t v,
49212 : ae_state *_state)
49213 : {
49214 : ae_int_t result;
49215 :
49216 :
49217 0 : ae_assert(v>=0, "Assertion failed", _state);
49218 0 : result = 1;
49219 0 : while(v>=128)
49220 : {
49221 0 : v = v/128;
49222 0 : result = result+1;
49223 : }
49224 0 : return result;
49225 : }
49226 :
49227 :
49228 : /*************************************************************************
49229 : This function stores compressed unsigned integer number (negative arguments
49230 : result in assertion being generated) to byte array at location Offs and
49231 : increments Offs by number of bytes being stored.
49232 :
49233 : -- ALGLIB --
49234 : Copyright 22.07.2019 by Bochkanov Sergey
49235 : *************************************************************************/
49236 0 : static void dforest_streamuint(ae_vector* buf,
49237 : ae_int_t* offs,
49238 : ae_int_t v,
49239 : ae_state *_state)
49240 : {
49241 : ae_int_t v0;
49242 :
49243 :
49244 0 : ae_assert(v>=0, "Assertion failed", _state);
49245 : for(;;)
49246 : {
49247 :
49248 : /*
49249 : * Save 7 least significant bits of V, use 8th bit as a flag which
49250 : * tells us whether subsequent 7-bit packages will be sent.
49251 : */
49252 0 : v0 = v%128;
49253 0 : if( v>=128 )
49254 : {
49255 0 : v0 = v0+128;
49256 : }
49257 0 : buf->ptr.p_ubyte[*(offs)] = (unsigned char)(v0);
49258 0 : *offs = *offs+1;
49259 0 : v = v/128;
49260 0 : if( v==0 )
49261 : {
49262 0 : break;
49263 : }
49264 : }
49265 0 : }
49266 :
49267 :
49268 : /*************************************************************************
49269 : This function reads compressed unsigned integer number from byte array
49270 : starting at location Offs and increments Offs by number of bytes being
49271 : read.
49272 :
49273 : -- ALGLIB --
49274 : Copyright 22.07.2019 by Bochkanov Sergey
49275 : *************************************************************************/
49276 0 : static ae_int_t dforest_unstreamuint(ae_vector* buf,
49277 : ae_int_t* offs,
49278 : ae_state *_state)
49279 : {
49280 : ae_int_t v0;
49281 : ae_int_t p;
49282 : ae_int_t result;
49283 :
49284 :
49285 0 : result = 0;
49286 0 : p = 1;
49287 : for(;;)
49288 : {
49289 :
49290 : /*
49291 : * Rad 7 bits of V, use 8th bit as a flag which tells us whether
49292 : * subsequent 7-bit packages will be received.
49293 : */
49294 0 : v0 = buf->ptr.p_ubyte[*(offs)];
49295 0 : *offs = *offs+1;
49296 0 : result = result+v0%128*p;
49297 0 : if( v0<128 )
49298 : {
49299 0 : break;
49300 : }
49301 0 : p = p*128;
49302 : }
49303 0 : return result;
49304 : }
49305 :
49306 :
49307 : /*************************************************************************
49308 : This function stores compressed floating point number to byte array at
49309 : location Offs and increments Offs by number of bytes being stored.
49310 :
49311 : Either 8-bit mantissa or 16-bit mantissa is used. The exponent is always
49312 : 7 bits of exponent + sign. Values which do not fit into exponent range are
49313 : truncated to fit.
49314 :
49315 : -- ALGLIB --
49316 : Copyright 22.07.2019 by Bochkanov Sergey
49317 : *************************************************************************/
49318 0 : static void dforest_streamfloat(ae_vector* buf,
49319 : ae_bool usemantissa8,
49320 : ae_int_t* offs,
49321 : double v,
49322 : ae_state *_state)
49323 : {
49324 : ae_int_t signbit;
49325 : ae_int_t e;
49326 : ae_int_t m;
49327 : double twopow30;
49328 : double twopowm30;
49329 : double twopow10;
49330 : double twopowm10;
49331 :
49332 :
49333 0 : ae_assert(ae_isfinite(v, _state), "StreamFloat: V is not finite number", _state);
49334 :
49335 : /*
49336 : * Special case: zero
49337 : */
49338 0 : if( v==0.0 )
49339 : {
49340 0 : if( usemantissa8 )
49341 : {
49342 0 : buf->ptr.p_ubyte[*offs+0] = (unsigned char)(0);
49343 0 : buf->ptr.p_ubyte[*offs+1] = (unsigned char)(0);
49344 0 : *offs = *offs+2;
49345 : }
49346 : else
49347 : {
49348 0 : buf->ptr.p_ubyte[*offs+0] = (unsigned char)(0);
49349 0 : buf->ptr.p_ubyte[*offs+1] = (unsigned char)(0);
49350 0 : buf->ptr.p_ubyte[*offs+2] = (unsigned char)(0);
49351 0 : *offs = *offs+3;
49352 : }
49353 0 : return;
49354 : }
49355 :
49356 : /*
49357 : * Handle sign
49358 : */
49359 0 : signbit = 0;
49360 0 : if( v<0.0 )
49361 : {
49362 0 : v = -v;
49363 0 : signbit = 128;
49364 : }
49365 :
49366 : /*
49367 : * Compute exponent
49368 : */
49369 0 : twopow30 = (double)(1073741824);
49370 0 : twopow10 = (double)(1024);
49371 0 : twopowm30 = 1.0/twopow30;
49372 0 : twopowm10 = 1.0/twopow10;
49373 0 : e = 0;
49374 0 : while(v>=twopow30)
49375 : {
49376 0 : v = v*twopowm30;
49377 0 : e = e+30;
49378 : }
49379 0 : while(v>=twopow10)
49380 : {
49381 0 : v = v*twopowm10;
49382 0 : e = e+10;
49383 : }
49384 0 : while(v>=1.0)
49385 : {
49386 0 : v = v*0.5;
49387 0 : e = e+1;
49388 : }
49389 0 : while(v<twopowm30)
49390 : {
49391 0 : v = v*twopow30;
49392 0 : e = e-30;
49393 : }
49394 0 : while(v<twopowm10)
49395 : {
49396 0 : v = v*twopow10;
49397 0 : e = e-10;
49398 : }
49399 0 : while(v<0.5)
49400 : {
49401 0 : v = v*2;
49402 0 : e = e-1;
49403 : }
49404 0 : ae_assert(v>=0.5&&v<1.0, "StreamFloat: integrity check failed", _state);
49405 :
49406 : /*
49407 : * Handle exponent underflow/overflow
49408 : */
49409 0 : if( e<-63 )
49410 : {
49411 0 : signbit = 0;
49412 0 : e = 0;
49413 0 : v = (double)(0);
49414 : }
49415 0 : if( e>63 )
49416 : {
49417 0 : e = 63;
49418 0 : v = 1.0;
49419 : }
49420 :
49421 : /*
49422 : * Save to stream
49423 : */
49424 0 : if( usemantissa8 )
49425 : {
49426 0 : m = ae_round(v*256, _state);
49427 0 : if( m==256 )
49428 : {
49429 0 : m = m/2;
49430 0 : e = ae_minint(e+1, 63, _state);
49431 : }
49432 0 : buf->ptr.p_ubyte[*offs+0] = (unsigned char)(e+64+signbit);
49433 0 : buf->ptr.p_ubyte[*offs+1] = (unsigned char)(m);
49434 0 : *offs = *offs+2;
49435 : }
49436 : else
49437 : {
49438 0 : m = ae_round(v*65536, _state);
49439 0 : if( m==65536 )
49440 : {
49441 0 : m = m/2;
49442 0 : e = ae_minint(e+1, 63, _state);
49443 : }
49444 0 : buf->ptr.p_ubyte[*offs+0] = (unsigned char)(e+64+signbit);
49445 0 : buf->ptr.p_ubyte[*offs+1] = (unsigned char)(m%256);
49446 0 : buf->ptr.p_ubyte[*offs+2] = (unsigned char)(m/256);
49447 0 : *offs = *offs+3;
49448 : }
49449 : }
49450 :
49451 :
49452 : /*************************************************************************
49453 : This function reads compressed floating point number from the byte array
49454 : starting from location Offs and increments Offs by number of bytes being
49455 : read.
49456 :
49457 : Either 8-bit mantissa or 16-bit mantissa is used. The exponent is always
49458 : 7 bits of exponent + sign. Values which do not fit into exponent range are
49459 : truncated to fit.
49460 :
49461 : -- ALGLIB --
49462 : Copyright 22.07.2019 by Bochkanov Sergey
49463 : *************************************************************************/
49464 0 : static double dforest_unstreamfloat(ae_vector* buf,
49465 : ae_bool usemantissa8,
49466 : ae_int_t* offs,
49467 : ae_state *_state)
49468 : {
49469 : ae_int_t e;
49470 : double v;
49471 : double inv256;
49472 : double result;
49473 :
49474 :
49475 :
49476 : /*
49477 : * Read from stream
49478 : */
49479 0 : inv256 = 1.0/256.0;
49480 0 : if( usemantissa8 )
49481 : {
49482 0 : e = buf->ptr.p_ubyte[*offs+0];
49483 0 : v = buf->ptr.p_ubyte[*offs+1]*inv256;
49484 0 : *offs = *offs+2;
49485 : }
49486 : else
49487 : {
49488 0 : e = buf->ptr.p_ubyte[*offs+0];
49489 0 : v = (buf->ptr.p_ubyte[*offs+1]*inv256+buf->ptr.p_ubyte[*offs+2])*inv256;
49490 0 : *offs = *offs+3;
49491 : }
49492 :
49493 : /*
49494 : * Decode
49495 : */
49496 0 : if( e>128 )
49497 : {
49498 0 : v = -v;
49499 0 : e = e-128;
49500 : }
49501 0 : e = e-64;
49502 0 : result = dforest_xfastpow((double)(2), e, _state)*v;
49503 0 : return result;
49504 : }
49505 :
49506 :
49507 : /*************************************************************************
49508 : Classification error
49509 : *************************************************************************/
49510 0 : static ae_int_t dforest_dfclserror(decisionforest* df,
49511 : /* Real */ ae_matrix* xy,
49512 : ae_int_t npoints,
49513 : ae_state *_state)
49514 : {
49515 : ae_frame _frame_block;
49516 : ae_vector x;
49517 : ae_vector y;
49518 : ae_int_t i;
49519 : ae_int_t j;
49520 : ae_int_t k;
49521 : ae_int_t tmpi;
49522 : ae_int_t result;
49523 :
49524 0 : ae_frame_make(_state, &_frame_block);
49525 0 : memset(&x, 0, sizeof(x));
49526 0 : memset(&y, 0, sizeof(y));
49527 0 : ae_vector_init(&x, 0, DT_REAL, _state, ae_true);
49528 0 : ae_vector_init(&y, 0, DT_REAL, _state, ae_true);
49529 :
49530 0 : if( df->nclasses<=1 )
49531 : {
49532 0 : result = 0;
49533 0 : ae_frame_leave(_state);
49534 0 : return result;
49535 : }
49536 0 : ae_vector_set_length(&x, df->nvars-1+1, _state);
49537 0 : ae_vector_set_length(&y, df->nclasses-1+1, _state);
49538 0 : result = 0;
49539 0 : for(i=0; i<=npoints-1; i++)
49540 : {
49541 0 : ae_v_move(&x.ptr.p_double[0], 1, &xy->ptr.pp_double[i][0], 1, ae_v_len(0,df->nvars-1));
49542 0 : dfprocess(df, &x, &y, _state);
49543 0 : k = ae_round(xy->ptr.pp_double[i][df->nvars], _state);
49544 0 : tmpi = 0;
49545 0 : for(j=1; j<=df->nclasses-1; j++)
49546 : {
49547 0 : if( ae_fp_greater(y.ptr.p_double[j],y.ptr.p_double[tmpi]) )
49548 : {
49549 0 : tmpi = j;
49550 : }
49551 : }
49552 0 : if( tmpi!=k )
49553 : {
49554 0 : result = result+1;
49555 : }
49556 : }
49557 0 : ae_frame_leave(_state);
49558 0 : return result;
49559 : }
49560 :
49561 :
49562 : /*************************************************************************
49563 : Internal subroutine for processing one decision tree stored in uncompressed
49564 : format starting at SubtreeRoot (this index points to the header of the tree,
49565 : not its first node). First node being processed is located at NodeOffs.
49566 : *************************************************************************/
49567 0 : static void dforest_dfprocessinternaluncompressed(decisionforest* df,
49568 : ae_int_t subtreeroot,
49569 : ae_int_t nodeoffs,
49570 : /* Real */ ae_vector* x,
49571 : /* Real */ ae_vector* y,
49572 : ae_state *_state)
49573 : {
49574 : ae_int_t idx;
49575 :
49576 :
49577 0 : ae_assert(df->forestformat==dforest_dfuncompressedv0, "DFProcessInternal: unexpected forest format", _state);
49578 :
49579 : /*
49580 : * Navigate through the tree
49581 : */
49582 : for(;;)
49583 : {
49584 0 : if( ae_fp_eq(df->trees.ptr.p_double[nodeoffs],(double)(-1)) )
49585 : {
49586 0 : if( df->nclasses==1 )
49587 : {
49588 0 : y->ptr.p_double[0] = y->ptr.p_double[0]+df->trees.ptr.p_double[nodeoffs+1];
49589 : }
49590 : else
49591 : {
49592 0 : idx = ae_round(df->trees.ptr.p_double[nodeoffs+1], _state);
49593 0 : y->ptr.p_double[idx] = y->ptr.p_double[idx]+1;
49594 : }
49595 0 : break;
49596 : }
49597 0 : if( x->ptr.p_double[ae_round(df->trees.ptr.p_double[nodeoffs], _state)]<df->trees.ptr.p_double[nodeoffs+1] )
49598 : {
49599 0 : nodeoffs = nodeoffs+dforest_innernodewidth;
49600 : }
49601 : else
49602 : {
49603 0 : nodeoffs = subtreeroot+ae_round(df->trees.ptr.p_double[nodeoffs+2], _state);
49604 : }
49605 : }
49606 0 : }
49607 :
49608 :
49609 : /*************************************************************************
49610 : Internal subroutine for processing one decision tree stored in compressed
49611 : format starting at Offs (this index points to the first node of the tree,
49612 : right past the header field).
49613 : *************************************************************************/
49614 0 : static void dforest_dfprocessinternalcompressed(decisionforest* df,
49615 : ae_int_t offs,
49616 : /* Real */ ae_vector* x,
49617 : /* Real */ ae_vector* y,
49618 : ae_state *_state)
49619 : {
49620 : ae_int_t leafindicator;
49621 : ae_int_t varidx;
49622 : double splitval;
49623 : ae_int_t jmplen;
49624 : double leafval;
49625 : ae_int_t leafcls;
49626 :
49627 :
49628 0 : ae_assert(df->forestformat==dforest_dfcompressedv0, "DFProcessInternal: unexpected forest format", _state);
49629 :
49630 : /*
49631 : * Navigate through the tree
49632 : */
49633 0 : leafindicator = 2*df->nvars;
49634 : for(;;)
49635 : {
49636 :
49637 : /*
49638 : * Read variable idx
49639 : */
49640 0 : varidx = dforest_unstreamuint(&df->trees8, &offs, _state);
49641 :
49642 : /*
49643 : * Is it leaf?
49644 : */
49645 0 : if( varidx==leafindicator )
49646 : {
49647 0 : if( df->nclasses==1 )
49648 : {
49649 :
49650 : /*
49651 : * Regression forest
49652 : */
49653 0 : leafval = dforest_unstreamfloat(&df->trees8, df->usemantissa8, &offs, _state);
49654 0 : y->ptr.p_double[0] = y->ptr.p_double[0]+leafval;
49655 : }
49656 : else
49657 : {
49658 :
49659 : /*
49660 : * Classification forest
49661 : */
49662 0 : leafcls = dforest_unstreamuint(&df->trees8, &offs, _state);
49663 0 : y->ptr.p_double[leafcls] = y->ptr.p_double[leafcls]+1;
49664 : }
49665 0 : break;
49666 : }
49667 :
49668 : /*
49669 : * Process node
49670 : */
49671 0 : splitval = dforest_unstreamfloat(&df->trees8, df->usemantissa8, &offs, _state);
49672 0 : jmplen = dforest_unstreamuint(&df->trees8, &offs, _state);
49673 0 : if( varidx<df->nvars )
49674 : {
49675 :
49676 : /*
49677 : * The split rule is "if VAR<VAL then BRANCH0 else BRANCH1"
49678 : */
49679 0 : if( x->ptr.p_double[varidx]>=splitval )
49680 : {
49681 0 : offs = offs+jmplen;
49682 : }
49683 : }
49684 : else
49685 : {
49686 :
49687 : /*
49688 : * The split rule is "if VAR>=VAL then BRANCH0 else BRANCH1"
49689 : */
49690 0 : varidx = varidx-df->nvars;
49691 0 : if( x->ptr.p_double[varidx]<splitval )
49692 : {
49693 0 : offs = offs+jmplen;
49694 : }
49695 : }
49696 : }
49697 0 : }
49698 :
49699 :
49700 : /*************************************************************************
49701 : Fast Pow
49702 :
49703 : -- ALGLIB --
49704 : Copyright 24.08.2009 by Bochkanov Sergey
49705 : *************************************************************************/
49706 0 : static double dforest_xfastpow(double r, ae_int_t n, ae_state *_state)
49707 : {
49708 : double result;
49709 :
49710 :
49711 0 : result = (double)(0);
49712 0 : if( n>0 )
49713 : {
49714 0 : if( n%2==0 )
49715 : {
49716 0 : result = dforest_xfastpow(r, n/2, _state);
49717 0 : result = result*result;
49718 : }
49719 : else
49720 : {
49721 0 : result = r*dforest_xfastpow(r, n-1, _state);
49722 : }
49723 0 : return result;
49724 : }
49725 0 : if( n==0 )
49726 : {
49727 0 : result = (double)(1);
49728 : }
49729 0 : if( n<0 )
49730 : {
49731 0 : result = dforest_xfastpow(1/r, -n, _state);
49732 : }
49733 0 : return result;
49734 : }
49735 :
49736 :
49737 0 : void _decisionforestbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic)
49738 : {
49739 0 : decisionforestbuilder *p = (decisionforestbuilder*)_p;
49740 0 : ae_touch_ptr((void*)p);
49741 0 : ae_vector_init(&p->dsdata, 0, DT_REAL, _state, make_automatic);
49742 0 : ae_vector_init(&p->dsrval, 0, DT_REAL, _state, make_automatic);
49743 0 : ae_vector_init(&p->dsival, 0, DT_INT, _state, make_automatic);
49744 0 : ae_vector_init(&p->dsmin, 0, DT_REAL, _state, make_automatic);
49745 0 : ae_vector_init(&p->dsmax, 0, DT_REAL, _state, make_automatic);
49746 0 : ae_vector_init(&p->dsbinary, 0, DT_BOOL, _state, make_automatic);
49747 0 : ae_vector_init(&p->dsctotals, 0, DT_INT, _state, make_automatic);
49748 0 : ae_shared_pool_init(&p->workpool, _state, make_automatic);
49749 0 : ae_shared_pool_init(&p->votepool, _state, make_automatic);
49750 0 : ae_shared_pool_init(&p->treepool, _state, make_automatic);
49751 0 : ae_shared_pool_init(&p->treefactory, _state, make_automatic);
49752 0 : ae_matrix_init(&p->iobmatrix, 0, 0, DT_BOOL, _state, make_automatic);
49753 0 : ae_vector_init(&p->varimpshuffle2, 0, DT_INT, _state, make_automatic);
49754 0 : }
49755 :
49756 :
49757 0 : void _decisionforestbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
49758 : {
49759 0 : decisionforestbuilder *dst = (decisionforestbuilder*)_dst;
49760 0 : decisionforestbuilder *src = (decisionforestbuilder*)_src;
49761 0 : dst->dstype = src->dstype;
49762 0 : dst->npoints = src->npoints;
49763 0 : dst->nvars = src->nvars;
49764 0 : dst->nclasses = src->nclasses;
49765 0 : ae_vector_init_copy(&dst->dsdata, &src->dsdata, _state, make_automatic);
49766 0 : ae_vector_init_copy(&dst->dsrval, &src->dsrval, _state, make_automatic);
49767 0 : ae_vector_init_copy(&dst->dsival, &src->dsival, _state, make_automatic);
49768 0 : dst->rdfalgo = src->rdfalgo;
49769 0 : dst->rdfratio = src->rdfratio;
49770 0 : dst->rdfvars = src->rdfvars;
49771 0 : dst->rdfglobalseed = src->rdfglobalseed;
49772 0 : dst->rdfsplitstrength = src->rdfsplitstrength;
49773 0 : dst->rdfimportance = src->rdfimportance;
49774 0 : ae_vector_init_copy(&dst->dsmin, &src->dsmin, _state, make_automatic);
49775 0 : ae_vector_init_copy(&dst->dsmax, &src->dsmax, _state, make_automatic);
49776 0 : ae_vector_init_copy(&dst->dsbinary, &src->dsbinary, _state, make_automatic);
49777 0 : dst->dsravg = src->dsravg;
49778 0 : ae_vector_init_copy(&dst->dsctotals, &src->dsctotals, _state, make_automatic);
49779 0 : dst->rdfprogress = src->rdfprogress;
49780 0 : dst->rdftotal = src->rdftotal;
49781 0 : ae_shared_pool_init_copy(&dst->workpool, &src->workpool, _state, make_automatic);
49782 0 : ae_shared_pool_init_copy(&dst->votepool, &src->votepool, _state, make_automatic);
49783 0 : ae_shared_pool_init_copy(&dst->treepool, &src->treepool, _state, make_automatic);
49784 0 : ae_shared_pool_init_copy(&dst->treefactory, &src->treefactory, _state, make_automatic);
49785 0 : dst->neediobmatrix = src->neediobmatrix;
49786 0 : ae_matrix_init_copy(&dst->iobmatrix, &src->iobmatrix, _state, make_automatic);
49787 0 : ae_vector_init_copy(&dst->varimpshuffle2, &src->varimpshuffle2, _state, make_automatic);
49788 0 : }
49789 :
49790 :
49791 0 : void _decisionforestbuilder_clear(void* _p)
49792 : {
49793 0 : decisionforestbuilder *p = (decisionforestbuilder*)_p;
49794 0 : ae_touch_ptr((void*)p);
49795 0 : ae_vector_clear(&p->dsdata);
49796 0 : ae_vector_clear(&p->dsrval);
49797 0 : ae_vector_clear(&p->dsival);
49798 0 : ae_vector_clear(&p->dsmin);
49799 0 : ae_vector_clear(&p->dsmax);
49800 0 : ae_vector_clear(&p->dsbinary);
49801 0 : ae_vector_clear(&p->dsctotals);
49802 0 : ae_shared_pool_clear(&p->workpool);
49803 0 : ae_shared_pool_clear(&p->votepool);
49804 0 : ae_shared_pool_clear(&p->treepool);
49805 0 : ae_shared_pool_clear(&p->treefactory);
49806 0 : ae_matrix_clear(&p->iobmatrix);
49807 0 : ae_vector_clear(&p->varimpshuffle2);
49808 0 : }
49809 :
49810 :
49811 0 : void _decisionforestbuilder_destroy(void* _p)
49812 : {
49813 0 : decisionforestbuilder *p = (decisionforestbuilder*)_p;
49814 0 : ae_touch_ptr((void*)p);
49815 0 : ae_vector_destroy(&p->dsdata);
49816 0 : ae_vector_destroy(&p->dsrval);
49817 0 : ae_vector_destroy(&p->dsival);
49818 0 : ae_vector_destroy(&p->dsmin);
49819 0 : ae_vector_destroy(&p->dsmax);
49820 0 : ae_vector_destroy(&p->dsbinary);
49821 0 : ae_vector_destroy(&p->dsctotals);
49822 0 : ae_shared_pool_destroy(&p->workpool);
49823 0 : ae_shared_pool_destroy(&p->votepool);
49824 0 : ae_shared_pool_destroy(&p->treepool);
49825 0 : ae_shared_pool_destroy(&p->treefactory);
49826 0 : ae_matrix_destroy(&p->iobmatrix);
49827 0 : ae_vector_destroy(&p->varimpshuffle2);
49828 0 : }
49829 :
49830 :
49831 0 : void _dfworkbuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
49832 : {
49833 0 : dfworkbuf *p = (dfworkbuf*)_p;
49834 0 : ae_touch_ptr((void*)p);
49835 0 : ae_vector_init(&p->classpriors, 0, DT_INT, _state, make_automatic);
49836 0 : ae_vector_init(&p->varpool, 0, DT_INT, _state, make_automatic);
49837 0 : ae_vector_init(&p->trnset, 0, DT_INT, _state, make_automatic);
49838 0 : ae_vector_init(&p->trnlabelsr, 0, DT_REAL, _state, make_automatic);
49839 0 : ae_vector_init(&p->trnlabelsi, 0, DT_INT, _state, make_automatic);
49840 0 : ae_vector_init(&p->oobset, 0, DT_INT, _state, make_automatic);
49841 0 : ae_vector_init(&p->ooblabelsr, 0, DT_REAL, _state, make_automatic);
49842 0 : ae_vector_init(&p->ooblabelsi, 0, DT_INT, _state, make_automatic);
49843 0 : ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
49844 0 : ae_vector_init(&p->curvals, 0, DT_REAL, _state, make_automatic);
49845 0 : ae_vector_init(&p->bestvals, 0, DT_REAL, _state, make_automatic);
49846 0 : ae_vector_init(&p->tmp0i, 0, DT_INT, _state, make_automatic);
49847 0 : ae_vector_init(&p->tmp1i, 0, DT_INT, _state, make_automatic);
49848 0 : ae_vector_init(&p->tmp0r, 0, DT_REAL, _state, make_automatic);
49849 0 : ae_vector_init(&p->tmp1r, 0, DT_REAL, _state, make_automatic);
49850 0 : ae_vector_init(&p->tmp2r, 0, DT_REAL, _state, make_automatic);
49851 0 : ae_vector_init(&p->tmp3r, 0, DT_REAL, _state, make_automatic);
49852 0 : ae_vector_init(&p->tmpnrms2, 0, DT_INT, _state, make_automatic);
49853 0 : ae_vector_init(&p->classtotals0, 0, DT_INT, _state, make_automatic);
49854 0 : ae_vector_init(&p->classtotals1, 0, DT_INT, _state, make_automatic);
49855 0 : ae_vector_init(&p->classtotals01, 0, DT_INT, _state, make_automatic);
49856 0 : }
49857 :
49858 :
49859 0 : void _dfworkbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
49860 : {
49861 0 : dfworkbuf *dst = (dfworkbuf*)_dst;
49862 0 : dfworkbuf *src = (dfworkbuf*)_src;
49863 0 : ae_vector_init_copy(&dst->classpriors, &src->classpriors, _state, make_automatic);
49864 0 : ae_vector_init_copy(&dst->varpool, &src->varpool, _state, make_automatic);
49865 0 : dst->varpoolsize = src->varpoolsize;
49866 0 : ae_vector_init_copy(&dst->trnset, &src->trnset, _state, make_automatic);
49867 0 : dst->trnsize = src->trnsize;
49868 0 : ae_vector_init_copy(&dst->trnlabelsr, &src->trnlabelsr, _state, make_automatic);
49869 0 : ae_vector_init_copy(&dst->trnlabelsi, &src->trnlabelsi, _state, make_automatic);
49870 0 : ae_vector_init_copy(&dst->oobset, &src->oobset, _state, make_automatic);
49871 0 : dst->oobsize = src->oobsize;
49872 0 : ae_vector_init_copy(&dst->ooblabelsr, &src->ooblabelsr, _state, make_automatic);
49873 0 : ae_vector_init_copy(&dst->ooblabelsi, &src->ooblabelsi, _state, make_automatic);
49874 0 : ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
49875 0 : ae_vector_init_copy(&dst->curvals, &src->curvals, _state, make_automatic);
49876 0 : ae_vector_init_copy(&dst->bestvals, &src->bestvals, _state, make_automatic);
49877 0 : ae_vector_init_copy(&dst->tmp0i, &src->tmp0i, _state, make_automatic);
49878 0 : ae_vector_init_copy(&dst->tmp1i, &src->tmp1i, _state, make_automatic);
49879 0 : ae_vector_init_copy(&dst->tmp0r, &src->tmp0r, _state, make_automatic);
49880 0 : ae_vector_init_copy(&dst->tmp1r, &src->tmp1r, _state, make_automatic);
49881 0 : ae_vector_init_copy(&dst->tmp2r, &src->tmp2r, _state, make_automatic);
49882 0 : ae_vector_init_copy(&dst->tmp3r, &src->tmp3r, _state, make_automatic);
49883 0 : ae_vector_init_copy(&dst->tmpnrms2, &src->tmpnrms2, _state, make_automatic);
49884 0 : ae_vector_init_copy(&dst->classtotals0, &src->classtotals0, _state, make_automatic);
49885 0 : ae_vector_init_copy(&dst->classtotals1, &src->classtotals1, _state, make_automatic);
49886 0 : ae_vector_init_copy(&dst->classtotals01, &src->classtotals01, _state, make_automatic);
49887 0 : }
49888 :
49889 :
49890 0 : void _dfworkbuf_clear(void* _p)
49891 : {
49892 0 : dfworkbuf *p = (dfworkbuf*)_p;
49893 0 : ae_touch_ptr((void*)p);
49894 0 : ae_vector_clear(&p->classpriors);
49895 0 : ae_vector_clear(&p->varpool);
49896 0 : ae_vector_clear(&p->trnset);
49897 0 : ae_vector_clear(&p->trnlabelsr);
49898 0 : ae_vector_clear(&p->trnlabelsi);
49899 0 : ae_vector_clear(&p->oobset);
49900 0 : ae_vector_clear(&p->ooblabelsr);
49901 0 : ae_vector_clear(&p->ooblabelsi);
49902 0 : ae_vector_clear(&p->treebuf);
49903 0 : ae_vector_clear(&p->curvals);
49904 0 : ae_vector_clear(&p->bestvals);
49905 0 : ae_vector_clear(&p->tmp0i);
49906 0 : ae_vector_clear(&p->tmp1i);
49907 0 : ae_vector_clear(&p->tmp0r);
49908 0 : ae_vector_clear(&p->tmp1r);
49909 0 : ae_vector_clear(&p->tmp2r);
49910 0 : ae_vector_clear(&p->tmp3r);
49911 0 : ae_vector_clear(&p->tmpnrms2);
49912 0 : ae_vector_clear(&p->classtotals0);
49913 0 : ae_vector_clear(&p->classtotals1);
49914 0 : ae_vector_clear(&p->classtotals01);
49915 0 : }
49916 :
49917 :
49918 0 : void _dfworkbuf_destroy(void* _p)
49919 : {
49920 0 : dfworkbuf *p = (dfworkbuf*)_p;
49921 0 : ae_touch_ptr((void*)p);
49922 0 : ae_vector_destroy(&p->classpriors);
49923 0 : ae_vector_destroy(&p->varpool);
49924 0 : ae_vector_destroy(&p->trnset);
49925 0 : ae_vector_destroy(&p->trnlabelsr);
49926 0 : ae_vector_destroy(&p->trnlabelsi);
49927 0 : ae_vector_destroy(&p->oobset);
49928 0 : ae_vector_destroy(&p->ooblabelsr);
49929 0 : ae_vector_destroy(&p->ooblabelsi);
49930 0 : ae_vector_destroy(&p->treebuf);
49931 0 : ae_vector_destroy(&p->curvals);
49932 0 : ae_vector_destroy(&p->bestvals);
49933 0 : ae_vector_destroy(&p->tmp0i);
49934 0 : ae_vector_destroy(&p->tmp1i);
49935 0 : ae_vector_destroy(&p->tmp0r);
49936 0 : ae_vector_destroy(&p->tmp1r);
49937 0 : ae_vector_destroy(&p->tmp2r);
49938 0 : ae_vector_destroy(&p->tmp3r);
49939 0 : ae_vector_destroy(&p->tmpnrms2);
49940 0 : ae_vector_destroy(&p->classtotals0);
49941 0 : ae_vector_destroy(&p->classtotals1);
49942 0 : ae_vector_destroy(&p->classtotals01);
49943 0 : }
49944 :
49945 :
49946 0 : void _dfvotebuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
49947 : {
49948 0 : dfvotebuf *p = (dfvotebuf*)_p;
49949 0 : ae_touch_ptr((void*)p);
49950 0 : ae_vector_init(&p->trntotals, 0, DT_REAL, _state, make_automatic);
49951 0 : ae_vector_init(&p->oobtotals, 0, DT_REAL, _state, make_automatic);
49952 0 : ae_vector_init(&p->trncounts, 0, DT_INT, _state, make_automatic);
49953 0 : ae_vector_init(&p->oobcounts, 0, DT_INT, _state, make_automatic);
49954 0 : ae_vector_init(&p->giniimportances, 0, DT_REAL, _state, make_automatic);
49955 0 : }
49956 :
49957 :
49958 0 : void _dfvotebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
49959 : {
49960 0 : dfvotebuf *dst = (dfvotebuf*)_dst;
49961 0 : dfvotebuf *src = (dfvotebuf*)_src;
49962 0 : ae_vector_init_copy(&dst->trntotals, &src->trntotals, _state, make_automatic);
49963 0 : ae_vector_init_copy(&dst->oobtotals, &src->oobtotals, _state, make_automatic);
49964 0 : ae_vector_init_copy(&dst->trncounts, &src->trncounts, _state, make_automatic);
49965 0 : ae_vector_init_copy(&dst->oobcounts, &src->oobcounts, _state, make_automatic);
49966 0 : ae_vector_init_copy(&dst->giniimportances, &src->giniimportances, _state, make_automatic);
49967 0 : }
49968 :
49969 :
49970 0 : void _dfvotebuf_clear(void* _p)
49971 : {
49972 0 : dfvotebuf *p = (dfvotebuf*)_p;
49973 0 : ae_touch_ptr((void*)p);
49974 0 : ae_vector_clear(&p->trntotals);
49975 0 : ae_vector_clear(&p->oobtotals);
49976 0 : ae_vector_clear(&p->trncounts);
49977 0 : ae_vector_clear(&p->oobcounts);
49978 0 : ae_vector_clear(&p->giniimportances);
49979 0 : }
49980 :
49981 :
49982 0 : void _dfvotebuf_destroy(void* _p)
49983 : {
49984 0 : dfvotebuf *p = (dfvotebuf*)_p;
49985 0 : ae_touch_ptr((void*)p);
49986 0 : ae_vector_destroy(&p->trntotals);
49987 0 : ae_vector_destroy(&p->oobtotals);
49988 0 : ae_vector_destroy(&p->trncounts);
49989 0 : ae_vector_destroy(&p->oobcounts);
49990 0 : ae_vector_destroy(&p->giniimportances);
49991 0 : }
49992 :
49993 :
49994 0 : void _dfpermimpbuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
49995 : {
49996 0 : dfpermimpbuf *p = (dfpermimpbuf*)_p;
49997 0 : ae_touch_ptr((void*)p);
49998 0 : ae_vector_init(&p->losses, 0, DT_REAL, _state, make_automatic);
49999 0 : ae_vector_init(&p->xraw, 0, DT_REAL, _state, make_automatic);
50000 0 : ae_vector_init(&p->xdist, 0, DT_REAL, _state, make_automatic);
50001 0 : ae_vector_init(&p->xcur, 0, DT_REAL, _state, make_automatic);
50002 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
50003 0 : ae_vector_init(&p->yv, 0, DT_REAL, _state, make_automatic);
50004 0 : ae_vector_init(&p->targety, 0, DT_REAL, _state, make_automatic);
50005 0 : ae_vector_init(&p->startnodes, 0, DT_INT, _state, make_automatic);
50006 0 : }
50007 :
50008 :
50009 0 : void _dfpermimpbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50010 : {
50011 0 : dfpermimpbuf *dst = (dfpermimpbuf*)_dst;
50012 0 : dfpermimpbuf *src = (dfpermimpbuf*)_src;
50013 0 : ae_vector_init_copy(&dst->losses, &src->losses, _state, make_automatic);
50014 0 : ae_vector_init_copy(&dst->xraw, &src->xraw, _state, make_automatic);
50015 0 : ae_vector_init_copy(&dst->xdist, &src->xdist, _state, make_automatic);
50016 0 : ae_vector_init_copy(&dst->xcur, &src->xcur, _state, make_automatic);
50017 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
50018 0 : ae_vector_init_copy(&dst->yv, &src->yv, _state, make_automatic);
50019 0 : ae_vector_init_copy(&dst->targety, &src->targety, _state, make_automatic);
50020 0 : ae_vector_init_copy(&dst->startnodes, &src->startnodes, _state, make_automatic);
50021 0 : }
50022 :
50023 :
50024 0 : void _dfpermimpbuf_clear(void* _p)
50025 : {
50026 0 : dfpermimpbuf *p = (dfpermimpbuf*)_p;
50027 0 : ae_touch_ptr((void*)p);
50028 0 : ae_vector_clear(&p->losses);
50029 0 : ae_vector_clear(&p->xraw);
50030 0 : ae_vector_clear(&p->xdist);
50031 0 : ae_vector_clear(&p->xcur);
50032 0 : ae_vector_clear(&p->y);
50033 0 : ae_vector_clear(&p->yv);
50034 0 : ae_vector_clear(&p->targety);
50035 0 : ae_vector_clear(&p->startnodes);
50036 0 : }
50037 :
50038 :
50039 0 : void _dfpermimpbuf_destroy(void* _p)
50040 : {
50041 0 : dfpermimpbuf *p = (dfpermimpbuf*)_p;
50042 0 : ae_touch_ptr((void*)p);
50043 0 : ae_vector_destroy(&p->losses);
50044 0 : ae_vector_destroy(&p->xraw);
50045 0 : ae_vector_destroy(&p->xdist);
50046 0 : ae_vector_destroy(&p->xcur);
50047 0 : ae_vector_destroy(&p->y);
50048 0 : ae_vector_destroy(&p->yv);
50049 0 : ae_vector_destroy(&p->targety);
50050 0 : ae_vector_destroy(&p->startnodes);
50051 0 : }
50052 :
50053 :
50054 0 : void _dftreebuf_init(void* _p, ae_state *_state, ae_bool make_automatic)
50055 : {
50056 0 : dftreebuf *p = (dftreebuf*)_p;
50057 0 : ae_touch_ptr((void*)p);
50058 0 : ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
50059 0 : }
50060 :
50061 :
50062 0 : void _dftreebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50063 : {
50064 0 : dftreebuf *dst = (dftreebuf*)_dst;
50065 0 : dftreebuf *src = (dftreebuf*)_src;
50066 0 : ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
50067 0 : dst->treeidx = src->treeidx;
50068 0 : }
50069 :
50070 :
50071 0 : void _dftreebuf_clear(void* _p)
50072 : {
50073 0 : dftreebuf *p = (dftreebuf*)_p;
50074 0 : ae_touch_ptr((void*)p);
50075 0 : ae_vector_clear(&p->treebuf);
50076 0 : }
50077 :
50078 :
50079 0 : void _dftreebuf_destroy(void* _p)
50080 : {
50081 0 : dftreebuf *p = (dftreebuf*)_p;
50082 0 : ae_touch_ptr((void*)p);
50083 0 : ae_vector_destroy(&p->treebuf);
50084 0 : }
50085 :
50086 :
50087 0 : void _decisionforestbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
50088 : {
50089 0 : decisionforestbuffer *p = (decisionforestbuffer*)_p;
50090 0 : ae_touch_ptr((void*)p);
50091 0 : ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
50092 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
50093 0 : }
50094 :
50095 :
50096 0 : void _decisionforestbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50097 : {
50098 0 : decisionforestbuffer *dst = (decisionforestbuffer*)_dst;
50099 0 : decisionforestbuffer *src = (decisionforestbuffer*)_src;
50100 0 : ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
50101 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
50102 0 : }
50103 :
50104 :
50105 0 : void _decisionforestbuffer_clear(void* _p)
50106 : {
50107 0 : decisionforestbuffer *p = (decisionforestbuffer*)_p;
50108 0 : ae_touch_ptr((void*)p);
50109 0 : ae_vector_clear(&p->x);
50110 0 : ae_vector_clear(&p->y);
50111 0 : }
50112 :
50113 :
50114 0 : void _decisionforestbuffer_destroy(void* _p)
50115 : {
50116 0 : decisionforestbuffer *p = (decisionforestbuffer*)_p;
50117 0 : ae_touch_ptr((void*)p);
50118 0 : ae_vector_destroy(&p->x);
50119 0 : ae_vector_destroy(&p->y);
50120 0 : }
50121 :
50122 :
50123 0 : void _decisionforest_init(void* _p, ae_state *_state, ae_bool make_automatic)
50124 : {
50125 0 : decisionforest *p = (decisionforest*)_p;
50126 0 : ae_touch_ptr((void*)p);
50127 0 : ae_vector_init(&p->trees, 0, DT_REAL, _state, make_automatic);
50128 0 : _decisionforestbuffer_init(&p->buffer, _state, make_automatic);
50129 0 : ae_vector_init(&p->trees8, 0, DT_BYTE, _state, make_automatic);
50130 0 : }
50131 :
50132 :
50133 0 : void _decisionforest_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50134 : {
50135 0 : decisionforest *dst = (decisionforest*)_dst;
50136 0 : decisionforest *src = (decisionforest*)_src;
50137 0 : dst->forestformat = src->forestformat;
50138 0 : dst->usemantissa8 = src->usemantissa8;
50139 0 : dst->nvars = src->nvars;
50140 0 : dst->nclasses = src->nclasses;
50141 0 : dst->ntrees = src->ntrees;
50142 0 : dst->bufsize = src->bufsize;
50143 0 : ae_vector_init_copy(&dst->trees, &src->trees, _state, make_automatic);
50144 0 : _decisionforestbuffer_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
50145 0 : ae_vector_init_copy(&dst->trees8, &src->trees8, _state, make_automatic);
50146 0 : }
50147 :
50148 :
50149 0 : void _decisionforest_clear(void* _p)
50150 : {
50151 0 : decisionforest *p = (decisionforest*)_p;
50152 0 : ae_touch_ptr((void*)p);
50153 0 : ae_vector_clear(&p->trees);
50154 0 : _decisionforestbuffer_clear(&p->buffer);
50155 0 : ae_vector_clear(&p->trees8);
50156 0 : }
50157 :
50158 :
50159 0 : void _decisionforest_destroy(void* _p)
50160 : {
50161 0 : decisionforest *p = (decisionforest*)_p;
50162 0 : ae_touch_ptr((void*)p);
50163 0 : ae_vector_destroy(&p->trees);
50164 0 : _decisionforestbuffer_destroy(&p->buffer);
50165 0 : ae_vector_destroy(&p->trees8);
50166 0 : }
50167 :
50168 :
50169 0 : void _dfreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
50170 : {
50171 0 : dfreport *p = (dfreport*)_p;
50172 0 : ae_touch_ptr((void*)p);
50173 0 : ae_vector_init(&p->topvars, 0, DT_INT, _state, make_automatic);
50174 0 : ae_vector_init(&p->varimportances, 0, DT_REAL, _state, make_automatic);
50175 0 : }
50176 :
50177 :
50178 0 : void _dfreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50179 : {
50180 0 : dfreport *dst = (dfreport*)_dst;
50181 0 : dfreport *src = (dfreport*)_src;
50182 0 : dst->relclserror = src->relclserror;
50183 0 : dst->avgce = src->avgce;
50184 0 : dst->rmserror = src->rmserror;
50185 0 : dst->avgerror = src->avgerror;
50186 0 : dst->avgrelerror = src->avgrelerror;
50187 0 : dst->oobrelclserror = src->oobrelclserror;
50188 0 : dst->oobavgce = src->oobavgce;
50189 0 : dst->oobrmserror = src->oobrmserror;
50190 0 : dst->oobavgerror = src->oobavgerror;
50191 0 : dst->oobavgrelerror = src->oobavgrelerror;
50192 0 : ae_vector_init_copy(&dst->topvars, &src->topvars, _state, make_automatic);
50193 0 : ae_vector_init_copy(&dst->varimportances, &src->varimportances, _state, make_automatic);
50194 0 : }
50195 :
50196 :
50197 0 : void _dfreport_clear(void* _p)
50198 : {
50199 0 : dfreport *p = (dfreport*)_p;
50200 0 : ae_touch_ptr((void*)p);
50201 0 : ae_vector_clear(&p->topvars);
50202 0 : ae_vector_clear(&p->varimportances);
50203 0 : }
50204 :
50205 :
50206 0 : void _dfreport_destroy(void* _p)
50207 : {
50208 0 : dfreport *p = (dfreport*)_p;
50209 0 : ae_touch_ptr((void*)p);
50210 0 : ae_vector_destroy(&p->topvars);
50211 0 : ae_vector_destroy(&p->varimportances);
50212 0 : }
50213 :
50214 :
50215 0 : void _dfinternalbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
50216 : {
50217 0 : dfinternalbuffers *p = (dfinternalbuffers*)_p;
50218 0 : ae_touch_ptr((void*)p);
50219 0 : ae_vector_init(&p->treebuf, 0, DT_REAL, _state, make_automatic);
50220 0 : ae_vector_init(&p->idxbuf, 0, DT_INT, _state, make_automatic);
50221 0 : ae_vector_init(&p->tmpbufr, 0, DT_REAL, _state, make_automatic);
50222 0 : ae_vector_init(&p->tmpbufr2, 0, DT_REAL, _state, make_automatic);
50223 0 : ae_vector_init(&p->tmpbufi, 0, DT_INT, _state, make_automatic);
50224 0 : ae_vector_init(&p->classibuf, 0, DT_INT, _state, make_automatic);
50225 0 : ae_vector_init(&p->sortrbuf, 0, DT_REAL, _state, make_automatic);
50226 0 : ae_vector_init(&p->sortrbuf2, 0, DT_REAL, _state, make_automatic);
50227 0 : ae_vector_init(&p->sortibuf, 0, DT_INT, _state, make_automatic);
50228 0 : ae_vector_init(&p->varpool, 0, DT_INT, _state, make_automatic);
50229 0 : ae_vector_init(&p->evsbin, 0, DT_BOOL, _state, make_automatic);
50230 0 : ae_vector_init(&p->evssplits, 0, DT_REAL, _state, make_automatic);
50231 0 : }
50232 :
50233 :
50234 0 : void _dfinternalbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
50235 : {
50236 0 : dfinternalbuffers *dst = (dfinternalbuffers*)_dst;
50237 0 : dfinternalbuffers *src = (dfinternalbuffers*)_src;
50238 0 : ae_vector_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
50239 0 : ae_vector_init_copy(&dst->idxbuf, &src->idxbuf, _state, make_automatic);
50240 0 : ae_vector_init_copy(&dst->tmpbufr, &src->tmpbufr, _state, make_automatic);
50241 0 : ae_vector_init_copy(&dst->tmpbufr2, &src->tmpbufr2, _state, make_automatic);
50242 0 : ae_vector_init_copy(&dst->tmpbufi, &src->tmpbufi, _state, make_automatic);
50243 0 : ae_vector_init_copy(&dst->classibuf, &src->classibuf, _state, make_automatic);
50244 0 : ae_vector_init_copy(&dst->sortrbuf, &src->sortrbuf, _state, make_automatic);
50245 0 : ae_vector_init_copy(&dst->sortrbuf2, &src->sortrbuf2, _state, make_automatic);
50246 0 : ae_vector_init_copy(&dst->sortibuf, &src->sortibuf, _state, make_automatic);
50247 0 : ae_vector_init_copy(&dst->varpool, &src->varpool, _state, make_automatic);
50248 0 : ae_vector_init_copy(&dst->evsbin, &src->evsbin, _state, make_automatic);
50249 0 : ae_vector_init_copy(&dst->evssplits, &src->evssplits, _state, make_automatic);
50250 0 : }
50251 :
50252 :
50253 0 : void _dfinternalbuffers_clear(void* _p)
50254 : {
50255 0 : dfinternalbuffers *p = (dfinternalbuffers*)_p;
50256 0 : ae_touch_ptr((void*)p);
50257 0 : ae_vector_clear(&p->treebuf);
50258 0 : ae_vector_clear(&p->idxbuf);
50259 0 : ae_vector_clear(&p->tmpbufr);
50260 0 : ae_vector_clear(&p->tmpbufr2);
50261 0 : ae_vector_clear(&p->tmpbufi);
50262 0 : ae_vector_clear(&p->classibuf);
50263 0 : ae_vector_clear(&p->sortrbuf);
50264 0 : ae_vector_clear(&p->sortrbuf2);
50265 0 : ae_vector_clear(&p->sortibuf);
50266 0 : ae_vector_clear(&p->varpool);
50267 0 : ae_vector_clear(&p->evsbin);
50268 0 : ae_vector_clear(&p->evssplits);
50269 0 : }
50270 :
50271 :
50272 0 : void _dfinternalbuffers_destroy(void* _p)
50273 : {
50274 0 : dfinternalbuffers *p = (dfinternalbuffers*)_p;
50275 0 : ae_touch_ptr((void*)p);
50276 0 : ae_vector_destroy(&p->treebuf);
50277 0 : ae_vector_destroy(&p->idxbuf);
50278 0 : ae_vector_destroy(&p->tmpbufr);
50279 0 : ae_vector_destroy(&p->tmpbufr2);
50280 0 : ae_vector_destroy(&p->tmpbufi);
50281 0 : ae_vector_destroy(&p->classibuf);
50282 0 : ae_vector_destroy(&p->sortrbuf);
50283 0 : ae_vector_destroy(&p->sortrbuf2);
50284 0 : ae_vector_destroy(&p->sortibuf);
50285 0 : ae_vector_destroy(&p->varpool);
50286 0 : ae_vector_destroy(&p->evsbin);
50287 0 : ae_vector_destroy(&p->evssplits);
50288 0 : }
50289 :
50290 :
50291 : #endif
50292 : #if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
50293 :
50294 :
50295 : /*************************************************************************
50296 : This function creates buffer structure which can be used to perform
50297 : parallel KNN requests.
50298 :
50299 : KNN subpackage provides two sets of computing functions - ones which use
50300 : internal buffer of KNN model (these functions are single-threaded because
50301 : they use same buffer, which can not shared between threads), and ones
50302 : which use external buffer.
50303 :
50304 : This function is used to initialize external buffer.
50305 :
50306 : INPUT PARAMETERS
50307 : Model - KNN model which is associated with newly created buffer
50308 :
50309 : OUTPUT PARAMETERS
50310 : Buf - external buffer.
50311 :
50312 :
50313 : IMPORTANT: buffer object should be used only with model which was used to
50314 : initialize buffer. Any attempt to use buffer with different
50315 : object is dangerous - you may get integrity check failure
50316 : (exception) because sizes of internal arrays do not fit to
50317 : dimensions of the model structure.
50318 :
50319 : -- ALGLIB --
50320 : Copyright 15.02.2019 by Bochkanov Sergey
50321 : *************************************************************************/
50322 0 : void knncreatebuffer(knnmodel* model, knnbuffer* buf, ae_state *_state)
50323 : {
50324 :
50325 0 : _knnbuffer_clear(buf);
50326 :
50327 0 : if( !model->isdummy )
50328 : {
50329 0 : kdtreecreaterequestbuffer(&model->tree, &buf->treebuf, _state);
50330 : }
50331 0 : ae_vector_set_length(&buf->x, model->nvars, _state);
50332 0 : ae_vector_set_length(&buf->y, model->nout, _state);
50333 0 : }
50334 :
50335 :
50336 : /*************************************************************************
50337 : This subroutine creates KNNBuilder object which is used to train KNN models.
50338 :
50339 : By default, new builder stores empty dataset and some reasonable default
50340 : settings. At the very least, you should specify dataset prior to building
50341 : KNN model. You can also tweak settings of the model construction algorithm
50342 : (recommended, although default settings should work well).
50343 :
50344 : Following actions are mandatory:
50345 : * calling knnbuildersetdataset() to specify dataset
50346 : * calling knnbuilderbuildknnmodel() to build KNN model using current
50347 : dataset and default settings
50348 :
50349 : Additionally, you may call:
50350 : * knnbuildersetnorm() to change norm being used
50351 :
50352 : INPUT PARAMETERS:
50353 : none
50354 :
50355 : OUTPUT PARAMETERS:
50356 : S - KNN builder
50357 :
50358 : -- ALGLIB --
50359 : Copyright 15.02.2019 by Bochkanov Sergey
50360 : *************************************************************************/
50361 0 : void knnbuildercreate(knnbuilder* s, ae_state *_state)
50362 : {
50363 :
50364 0 : _knnbuilder_clear(s);
50365 :
50366 :
50367 : /*
50368 : * Empty dataset
50369 : */
50370 0 : s->dstype = -1;
50371 0 : s->npoints = 0;
50372 0 : s->nvars = 0;
50373 0 : s->iscls = ae_false;
50374 0 : s->nout = 1;
50375 :
50376 : /*
50377 : * Default training settings
50378 : */
50379 0 : s->knnnrm = 2;
50380 0 : }
50381 :
50382 :
50383 : /*************************************************************************
50384 : Specifies regression problem (one or more continuous output variables are
50385 : predicted). There also exists "classification" version of this function.
50386 :
50387 : This subroutine adds dense dataset to the internal storage of the builder
50388 : object. Specifying your dataset in the dense format means that the dense
50389 : version of the KNN construction algorithm will be invoked.
50390 :
50391 : INPUT PARAMETERS:
50392 : S - KNN builder object
50393 : XY - array[NPoints,NVars+NOut] (note: actual size can be
50394 : larger, only leading part is used anyway), dataset:
50395 : * first NVars elements of each row store values of the
50396 : independent variables
50397 : * next NOut elements store values of the dependent
50398 : variables
50399 : NPoints - number of rows in the dataset, NPoints>=1
50400 : NVars - number of independent variables, NVars>=1
50401 : NOut - number of dependent variables, NOut>=1
50402 :
50403 : OUTPUT PARAMETERS:
50404 : S - KNN builder
50405 :
50406 : -- ALGLIB --
50407 : Copyright 15.02.2019 by Bochkanov Sergey
50408 : *************************************************************************/
50409 0 : void knnbuildersetdatasetreg(knnbuilder* s,
50410 : /* Real */ ae_matrix* xy,
50411 : ae_int_t npoints,
50412 : ae_int_t nvars,
50413 : ae_int_t nout,
50414 : ae_state *_state)
50415 : {
50416 : ae_int_t i;
50417 : ae_int_t j;
50418 :
50419 :
50420 :
50421 : /*
50422 : * Check parameters
50423 : */
50424 0 : ae_assert(npoints>=1, "knnbuildersetdatasetreg: npoints<1", _state);
50425 0 : ae_assert(nvars>=1, "knnbuildersetdatasetreg: nvars<1", _state);
50426 0 : ae_assert(nout>=1, "knnbuildersetdatasetreg: nout<1", _state);
50427 0 : ae_assert(xy->rows>=npoints, "knnbuildersetdatasetreg: rows(xy)<npoints", _state);
50428 0 : ae_assert(xy->cols>=nvars+nout, "knnbuildersetdatasetreg: cols(xy)<nvars+nout", _state);
50429 0 : ae_assert(apservisfinitematrix(xy, npoints, nvars+nout, _state), "knnbuildersetdatasetreg: xy parameter contains INFs or NANs", _state);
50430 :
50431 : /*
50432 : * Set dataset
50433 : */
50434 0 : s->dstype = 0;
50435 0 : s->iscls = ae_false;
50436 0 : s->npoints = npoints;
50437 0 : s->nvars = nvars;
50438 0 : s->nout = nout;
50439 0 : rmatrixsetlengthatleast(&s->dsdata, npoints, nvars, _state);
50440 0 : for(i=0; i<=npoints-1; i++)
50441 : {
50442 0 : for(j=0; j<=nvars-1; j++)
50443 : {
50444 0 : s->dsdata.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
50445 : }
50446 : }
50447 0 : rvectorsetlengthatleast(&s->dsrval, npoints*nout, _state);
50448 0 : for(i=0; i<=npoints-1; i++)
50449 : {
50450 0 : for(j=0; j<=nout-1; j++)
50451 : {
50452 0 : s->dsrval.ptr.p_double[i*nout+j] = xy->ptr.pp_double[i][nvars+j];
50453 : }
50454 : }
50455 0 : }
50456 :
50457 :
50458 : /*************************************************************************
50459 : Specifies classification problem (two or more classes are predicted).
50460 : There also exists "regression" version of this function.
50461 :
50462 : This subroutine adds dense dataset to the internal storage of the builder
50463 : object. Specifying your dataset in the dense format means that the dense
50464 : version of the KNN construction algorithm will be invoked.
50465 :
50466 : INPUT PARAMETERS:
50467 : S - KNN builder object
50468 : XY - array[NPoints,NVars+1] (note: actual size can be
50469 : larger, only leading part is used anyway), dataset:
50470 : * first NVars elements of each row store values of the
50471 : independent variables
50472 : * next element stores class index, in [0,NClasses)
50473 : NPoints - number of rows in the dataset, NPoints>=1
50474 : NVars - number of independent variables, NVars>=1
50475 : NClasses - number of classes, NClasses>=2
50476 :
50477 : OUTPUT PARAMETERS:
50478 : S - KNN builder
50479 :
50480 : -- ALGLIB --
50481 : Copyright 15.02.2019 by Bochkanov Sergey
50482 : *************************************************************************/
50483 0 : void knnbuildersetdatasetcls(knnbuilder* s,
50484 : /* Real */ ae_matrix* xy,
50485 : ae_int_t npoints,
50486 : ae_int_t nvars,
50487 : ae_int_t nclasses,
50488 : ae_state *_state)
50489 : {
50490 : ae_int_t i;
50491 : ae_int_t j;
50492 :
50493 :
50494 :
50495 : /*
50496 : * Check parameters
50497 : */
50498 0 : ae_assert(npoints>=1, "knnbuildersetdatasetcls: npoints<1", _state);
50499 0 : ae_assert(nvars>=1, "knnbuildersetdatasetcls: nvars<1", _state);
50500 0 : ae_assert(nclasses>=2, "knnbuildersetdatasetcls: nclasses<2", _state);
50501 0 : ae_assert(xy->rows>=npoints, "knnbuildersetdatasetcls: rows(xy)<npoints", _state);
50502 0 : ae_assert(xy->cols>=nvars+1, "knnbuildersetdatasetcls: cols(xy)<nvars+1", _state);
50503 0 : ae_assert(apservisfinitematrix(xy, npoints, nvars+1, _state), "knnbuildersetdatasetcls: xy parameter contains INFs or NANs", _state);
50504 0 : for(i=0; i<=npoints-1; i++)
50505 : {
50506 0 : j = ae_round(xy->ptr.pp_double[i][nvars], _state);
50507 0 : ae_assert(j>=0&&j<nclasses, "knnbuildersetdatasetcls: last column of xy contains invalid class number", _state);
50508 : }
50509 :
50510 : /*
50511 : * Set dataset
50512 : */
50513 0 : s->iscls = ae_true;
50514 0 : s->dstype = 0;
50515 0 : s->npoints = npoints;
50516 0 : s->nvars = nvars;
50517 0 : s->nout = nclasses;
50518 0 : rmatrixsetlengthatleast(&s->dsdata, npoints, nvars, _state);
50519 0 : for(i=0; i<=npoints-1; i++)
50520 : {
50521 0 : for(j=0; j<=nvars-1; j++)
50522 : {
50523 0 : s->dsdata.ptr.pp_double[i][j] = xy->ptr.pp_double[i][j];
50524 : }
50525 : }
50526 0 : ivectorsetlengthatleast(&s->dsival, npoints, _state);
50527 0 : for(i=0; i<=npoints-1; i++)
50528 : {
50529 0 : s->dsival.ptr.p_int[i] = ae_round(xy->ptr.pp_double[i][nvars], _state);
50530 : }
50531 0 : }
50532 :
50533 :
50534 : /*************************************************************************
50535 : This function sets norm type used for neighbor search.
50536 :
50537 : INPUT PARAMETERS:
50538 : S - decision forest builder object
50539 : NormType - norm type:
50540 : * 0 inf-norm
50541 : * 1 1-norm
50542 : * 2 Euclidean norm (default)
50543 :
50544 : OUTPUT PARAMETERS:
50545 : S - decision forest builder
50546 :
50547 : -- ALGLIB --
50548 : Copyright 15.02.2019 by Bochkanov Sergey
50549 : *************************************************************************/
50550 0 : void knnbuildersetnorm(knnbuilder* s, ae_int_t nrmtype, ae_state *_state)
50551 : {
50552 :
50553 :
50554 0 : ae_assert((nrmtype==0||nrmtype==1)||nrmtype==2, "knnbuildersetnorm: unexpected norm type", _state);
50555 0 : s->knnnrm = nrmtype;
50556 0 : }
50557 :
50558 :
50559 : /*************************************************************************
50560 : This subroutine builds KNN model according to current settings, using
50561 : dataset internally stored in the builder object.
50562 :
50563 : The model being built performs inference using Eps-approximate K nearest
50564 : neighbors search algorithm, with:
50565 : * K=1, Eps=0 corresponding to the "nearest neighbor algorithm"
50566 : * K>1, Eps=0 corresponding to the "K nearest neighbors algorithm"
50567 : * K>=1, Eps>0 corresponding to "approximate nearest neighbors algorithm"
50568 :
50569 : An approximate KNN is a good option for high-dimensional datasets (exact
50570 : KNN works slowly when dimensions count grows).
50571 :
50572 : An ALGLIB implementation of kd-trees is used to perform k-nn searches.
50573 :
50574 : ! COMMERCIAL EDITION OF ALGLIB:
50575 : !
50576 : ! Commercial Edition of ALGLIB includes following important improvements
50577 : ! of this function:
50578 : ! * high-performance native backend with same C# interface (C# version)
50579 : ! * multithreading support (C++ and C# versions)
50580 : !
50581 : ! We recommend you to read 'Working with commercial version' section of
50582 : ! ALGLIB Reference Manual in order to find out how to use performance-
50583 : ! related features provided by commercial edition of ALGLIB.
50584 :
50585 : INPUT PARAMETERS:
50586 : S - KNN builder object
50587 : K - number of neighbors to search for, K>=1
50588 : Eps - approximation factor:
50589 : * Eps=0 means that exact kNN search is performed
50590 : * Eps>0 means that (1+Eps)-approximate search is performed
50591 :
50592 : OUTPUT PARAMETERS:
50593 : Model - KNN model
50594 : Rep - report
50595 :
50596 : -- ALGLIB --
50597 : Copyright 15.02.2019 by Bochkanov Sergey
50598 : *************************************************************************/
50599 0 : void knnbuilderbuildknnmodel(knnbuilder* s,
50600 : ae_int_t k,
50601 : double eps,
50602 : knnmodel* model,
50603 : knnreport* rep,
50604 : ae_state *_state)
50605 : {
50606 : ae_frame _frame_block;
50607 : ae_int_t i;
50608 : ae_int_t j;
50609 : ae_int_t nvars;
50610 : ae_int_t nout;
50611 : ae_int_t npoints;
50612 : ae_bool iscls;
50613 : ae_matrix xy;
50614 : ae_vector tags;
50615 :
50616 0 : ae_frame_make(_state, &_frame_block);
50617 0 : memset(&xy, 0, sizeof(xy));
50618 0 : memset(&tags, 0, sizeof(tags));
50619 0 : _knnmodel_clear(model);
50620 0 : _knnreport_clear(rep);
50621 0 : ae_matrix_init(&xy, 0, 0, DT_REAL, _state, ae_true);
50622 0 : ae_vector_init(&tags, 0, DT_INT, _state, ae_true);
50623 :
50624 0 : npoints = s->npoints;
50625 0 : nvars = s->nvars;
50626 0 : nout = s->nout;
50627 0 : iscls = s->iscls;
50628 :
50629 : /*
50630 : * Check settings
50631 : */
50632 0 : ae_assert(k>=1, "knnbuilderbuildknnmodel: k<1", _state);
50633 0 : ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "knnbuilderbuildknnmodel: eps<0", _state);
50634 :
50635 : /*
50636 : * Prepare output
50637 : */
50638 0 : knn_clearreport(rep, _state);
50639 0 : model->nvars = nvars;
50640 0 : model->nout = nout;
50641 0 : model->iscls = iscls;
50642 0 : model->k = k;
50643 0 : model->eps = eps;
50644 0 : model->isdummy = ae_false;
50645 :
50646 : /*
50647 : * Quick exit for empty dataset
50648 : */
50649 0 : if( s->dstype==-1 )
50650 : {
50651 0 : model->isdummy = ae_true;
50652 0 : ae_frame_leave(_state);
50653 0 : return;
50654 : }
50655 :
50656 : /*
50657 : * Build kd-tree
50658 : */
50659 0 : if( iscls )
50660 : {
50661 0 : ae_matrix_set_length(&xy, npoints, nvars+1, _state);
50662 0 : ae_vector_set_length(&tags, npoints, _state);
50663 0 : for(i=0; i<=npoints-1; i++)
50664 : {
50665 0 : for(j=0; j<=nvars-1; j++)
50666 : {
50667 0 : xy.ptr.pp_double[i][j] = s->dsdata.ptr.pp_double[i][j];
50668 : }
50669 0 : xy.ptr.pp_double[i][nvars] = (double)(s->dsival.ptr.p_int[i]);
50670 0 : tags.ptr.p_int[i] = s->dsival.ptr.p_int[i];
50671 : }
50672 0 : kdtreebuildtagged(&xy, &tags, npoints, nvars, 0, s->knnnrm, &model->tree, _state);
50673 : }
50674 : else
50675 : {
50676 0 : ae_matrix_set_length(&xy, npoints, nvars+nout, _state);
50677 0 : for(i=0; i<=npoints-1; i++)
50678 : {
50679 0 : for(j=0; j<=nvars-1; j++)
50680 : {
50681 0 : xy.ptr.pp_double[i][j] = s->dsdata.ptr.pp_double[i][j];
50682 : }
50683 0 : for(j=0; j<=nout-1; j++)
50684 : {
50685 0 : xy.ptr.pp_double[i][nvars+j] = s->dsrval.ptr.p_double[i*nout+j];
50686 : }
50687 : }
50688 0 : kdtreebuild(&xy, npoints, nvars, nout, s->knnnrm, &model->tree, _state);
50689 : }
50690 :
50691 : /*
50692 : * Build buffer
50693 : */
50694 0 : knncreatebuffer(model, &model->buffer, _state);
50695 :
50696 : /*
50697 : * Report
50698 : */
50699 0 : knnallerrors(model, &xy, npoints, rep, _state);
50700 0 : ae_frame_leave(_state);
50701 : }
50702 :
50703 :
50704 : /*************************************************************************
50705 : Changing search settings of KNN model.
50706 :
50707 : K and EPS parameters of KNN (AKNN) search are specified during model
50708 : construction. However, plain KNN algorithm with Euclidean distance allows
50709 : you to change them at any moment.
50710 :
50711 : NOTE: future versions of KNN model may support advanced versions of KNN,
50712 : such as NCA or LMNN. It is possible that such algorithms won't allow
50713 : you to change search settings on the fly. If you call this function
50714 : for an algorithm which does not support on-the-fly changes, it will
50715 : throw an exception.
50716 :
50717 : INPUT PARAMETERS:
50718 : Model - KNN model
50719 : K - K>=1, neighbors count
50720 : EPS - accuracy of the EPS-approximate NN search. Set to 0.0, if
50721 : you want to perform "classic" KNN search. Specify larger
50722 : values if you need to speed-up high-dimensional KNN
50723 : queries.
50724 :
50725 : OUTPUT PARAMETERS:
50726 : nothing on success, exception on failure
50727 :
50728 : -- ALGLIB --
50729 : Copyright 15.02.2019 by Bochkanov Sergey
50730 : *************************************************************************/
50731 0 : void knnrewritekeps(knnmodel* model,
50732 : ae_int_t k,
50733 : double eps,
50734 : ae_state *_state)
50735 : {
50736 :
50737 :
50738 0 : ae_assert(k>=1, "knnrewritekeps: k<1", _state);
50739 0 : ae_assert(ae_isfinite(eps, _state)&&ae_fp_greater_eq(eps,(double)(0)), "knnrewritekeps: eps<0", _state);
50740 0 : model->k = k;
50741 0 : model->eps = eps;
50742 0 : }
50743 :
50744 :
50745 : /*************************************************************************
50746 : Inference using KNN model.
50747 :
50748 : See also knnprocess0(), knnprocessi() and knnclassify() for options with a
50749 : bit more convenient interface.
50750 :
50751 : IMPORTANT: this function is thread-unsafe and modifies internal structures
50752 : of the model! You can not use same model object for parallel
50753 : evaluation from several threads.
50754 :
50755 : Use knntsprocess() with independent thread-local buffers, if
50756 : you need thread-safe evaluation.
50757 :
50758 : INPUT PARAMETERS:
50759 : Model - KNN model
50760 : X - input vector, array[0..NVars-1].
50761 : Y - possible preallocated buffer. Reused if long enough.
50762 :
50763 : OUTPUT PARAMETERS:
50764 : Y - result. Regression estimate when solving regression task,
50765 : vector of posterior probabilities for classification task.
50766 :
50767 : -- ALGLIB --
50768 : Copyright 15.02.2019 by Bochkanov Sergey
50769 : *************************************************************************/
50770 0 : void knnprocess(knnmodel* model,
50771 : /* Real */ ae_vector* x,
50772 : /* Real */ ae_vector* y,
50773 : ae_state *_state)
50774 : {
50775 :
50776 :
50777 0 : knntsprocess(model, &model->buffer, x, y, _state);
50778 0 : }
50779 :
50780 :
50781 : /*************************************************************************
50782 : This function returns first component of the inferred vector (i.e. one
50783 : with index #0).
50784 :
50785 : It is a convenience wrapper for knnprocess() intended for either:
50786 : * 1-dimensional regression problems
50787 : * 2-class classification problems
50788 :
50789 : In the former case this function returns inference result as scalar, which
50790 : is definitely more convenient that wrapping it as vector. In the latter
50791 : case it returns probability of object belonging to class #0.
50792 :
50793 : If you call it for anything different from two cases above, it will work
50794 : as defined, i.e. return y[0], although it is of less use in such cases.
50795 :
50796 : IMPORTANT: this function is thread-unsafe and modifies internal structures
50797 : of the model! You can not use same model object for parallel
50798 : evaluation from several threads.
50799 :
50800 : Use knntsprocess() with independent thread-local buffers, if
50801 : you need thread-safe evaluation.
50802 :
50803 : INPUT PARAMETERS:
50804 : Model - KNN model
50805 : X - input vector, array[0..NVars-1].
50806 :
50807 : RESULT:
50808 : Y[0]
50809 :
50810 : -- ALGLIB --
50811 : Copyright 15.02.2019 by Bochkanov Sergey
50812 : *************************************************************************/
50813 0 : double knnprocess0(knnmodel* model,
50814 : /* Real */ ae_vector* x,
50815 : ae_state *_state)
50816 : {
50817 : ae_int_t i;
50818 : ae_int_t nvars;
50819 : double result;
50820 :
50821 :
50822 0 : nvars = model->nvars;
50823 0 : for(i=0; i<=nvars-1; i++)
50824 : {
50825 0 : model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
50826 : }
50827 0 : knn_processinternal(model, &model->buffer, _state);
50828 0 : result = model->buffer.y.ptr.p_double[0];
50829 0 : return result;
50830 : }
50831 :
50832 :
50833 : /*************************************************************************
50834 : This function returns most probable class number for an input X. It is
50835 : same as calling knnprocess(model,x,y), then determining i=argmax(y[i]) and
50836 : returning i.
50837 :
50838 : A class number in [0,NOut) range in returned for classification problems,
50839 : -1 is returned when this function is called for regression problems.
50840 :
50841 : IMPORTANT: this function is thread-unsafe and modifies internal structures
50842 : of the model! You can not use same model object for parallel
50843 : evaluation from several threads.
50844 :
50845 : Use knntsprocess() with independent thread-local buffers, if
50846 : you need thread-safe evaluation.
50847 :
50848 : INPUT PARAMETERS:
50849 : Model - KNN model
50850 : X - input vector, array[0..NVars-1].
50851 :
50852 : RESULT:
50853 : class number, -1 for regression tasks
50854 :
50855 : -- ALGLIB --
50856 : Copyright 15.02.2019 by Bochkanov Sergey
50857 : *************************************************************************/
50858 0 : ae_int_t knnclassify(knnmodel* model,
50859 : /* Real */ ae_vector* x,
50860 : ae_state *_state)
50861 : {
50862 : ae_int_t i;
50863 : ae_int_t nvars;
50864 : ae_int_t nout;
50865 : ae_int_t result;
50866 :
50867 :
50868 0 : if( !model->iscls )
50869 : {
50870 0 : result = -1;
50871 0 : return result;
50872 : }
50873 0 : nvars = model->nvars;
50874 0 : nout = model->nout;
50875 0 : for(i=0; i<=nvars-1; i++)
50876 : {
50877 0 : model->buffer.x.ptr.p_double[i] = x->ptr.p_double[i];
50878 : }
50879 0 : knn_processinternal(model, &model->buffer, _state);
50880 0 : result = 0;
50881 0 : for(i=1; i<=nout-1; i++)
50882 : {
50883 0 : if( model->buffer.y.ptr.p_double[i]>model->buffer.y.ptr.p_double[result] )
50884 : {
50885 0 : result = i;
50886 : }
50887 : }
50888 0 : return result;
50889 : }
50890 :
50891 :
50892 : /*************************************************************************
50893 : 'interactive' variant of knnprocess() for languages like Python which
50894 : support constructs like "y = knnprocessi(model,x)" and interactive mode of
50895 : the interpreter.
50896 :
50897 : This function allocates new array on each call, so it is significantly
50898 : slower than its 'non-interactive' counterpart, but it is more convenient
50899 : when you call it from command line.
50900 :
50901 : IMPORTANT: this function is thread-unsafe and may modify internal
50902 : structures of the model! You can not use same model object for
50903 : parallel evaluation from several threads.
50904 :
50905 : Use knntsprocess() with independent thread-local buffers if
50906 : you need thread-safe evaluation.
50907 :
50908 : -- ALGLIB --
50909 : Copyright 15.02.2019 by Bochkanov Sergey
50910 : *************************************************************************/
50911 0 : void knnprocessi(knnmodel* model,
50912 : /* Real */ ae_vector* x,
50913 : /* Real */ ae_vector* y,
50914 : ae_state *_state)
50915 : {
50916 :
50917 0 : ae_vector_clear(y);
50918 :
50919 0 : knnprocess(model, x, y, _state);
50920 0 : }
50921 :
50922 :
50923 : /*************************************************************************
50924 : Thread-safe procesing using external buffer for temporaries.
50925 :
50926 : This function is thread-safe (i.e . you can use same KNN model from
50927 : multiple threads) as long as you use different buffer objects for different
50928 : threads.
50929 :
50930 : INPUT PARAMETERS:
50931 : Model - KNN model
50932 : Buf - buffer object, must be allocated specifically for this
50933 : model with knncreatebuffer().
50934 : X - input vector, array[NVars]
50935 :
50936 : OUTPUT PARAMETERS:
50937 : Y - result, array[NOut]. Regression estimate when solving
50938 : regression task, vector of posterior probabilities for
50939 : a classification task.
50940 :
50941 : -- ALGLIB --
50942 : Copyright 15.02.2019 by Bochkanov Sergey
50943 : *************************************************************************/
50944 0 : void knntsprocess(knnmodel* model,
50945 : knnbuffer* buf,
50946 : /* Real */ ae_vector* x,
50947 : /* Real */ ae_vector* y,
50948 : ae_state *_state)
50949 : {
50950 : ae_int_t i;
50951 : ae_int_t nvars;
50952 : ae_int_t nout;
50953 :
50954 :
50955 0 : nvars = model->nvars;
50956 0 : nout = model->nout;
50957 0 : for(i=0; i<=nvars-1; i++)
50958 : {
50959 0 : buf->x.ptr.p_double[i] = x->ptr.p_double[i];
50960 : }
50961 0 : knn_processinternal(model, buf, _state);
50962 0 : if( y->cnt<nout )
50963 : {
50964 0 : ae_vector_set_length(y, nout, _state);
50965 : }
50966 0 : for(i=0; i<=nout-1; i++)
50967 : {
50968 0 : y->ptr.p_double[i] = buf->y.ptr.p_double[i];
50969 : }
50970 0 : }
50971 :
50972 :
50973 : /*************************************************************************
50974 : Relative classification error on the test set
50975 :
50976 : INPUT PARAMETERS:
50977 : Model - KNN model
50978 : XY - test set
50979 : NPoints - test set size
50980 :
50981 : RESULT:
50982 : percent of incorrectly classified cases.
50983 : Zero if model solves regression task.
50984 :
50985 : NOTE: if you need several different kinds of error metrics, it is better
50986 : to use knnallerrors() which computes all error metric with just one
50987 : pass over dataset.
50988 :
50989 : -- ALGLIB --
50990 : Copyright 15.02.2019 by Bochkanov Sergey
50991 : *************************************************************************/
50992 0 : double knnrelclserror(knnmodel* model,
50993 : /* Real */ ae_matrix* xy,
50994 : ae_int_t npoints,
50995 : ae_state *_state)
50996 : {
50997 : ae_frame _frame_block;
50998 : knnreport rep;
50999 : double result;
51000 :
51001 0 : ae_frame_make(_state, &_frame_block);
51002 0 : memset(&rep, 0, sizeof(rep));
51003 0 : _knnreport_init(&rep, _state, ae_true);
51004 :
51005 0 : knnallerrors(model, xy, npoints, &rep, _state);
51006 0 : result = rep.relclserror;
51007 0 : ae_frame_leave(_state);
51008 0 : return result;
51009 : }
51010 :
51011 :
51012 : /*************************************************************************
51013 : Average cross-entropy (in bits per element) on the test set
51014 :
51015 : INPUT PARAMETERS:
51016 : Model - KNN model
51017 : XY - test set
51018 : NPoints - test set size
51019 :
51020 : RESULT:
51021 : CrossEntropy/NPoints.
51022 : Zero if model solves regression task.
51023 :
51024 : NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
51025 : models (such models can report exactly zero probabilities), so we
51026 : do not recommend using it.
51027 :
51028 : NOTE: if you need several different kinds of error metrics, it is better
51029 : to use knnallerrors() which computes all error metric with just one
51030 : pass over dataset.
51031 :
51032 : -- ALGLIB --
51033 : Copyright 15.02.2019 by Bochkanov Sergey
51034 : *************************************************************************/
51035 0 : double knnavgce(knnmodel* model,
51036 : /* Real */ ae_matrix* xy,
51037 : ae_int_t npoints,
51038 : ae_state *_state)
51039 : {
51040 : ae_frame _frame_block;
51041 : knnreport rep;
51042 : double result;
51043 :
51044 0 : ae_frame_make(_state, &_frame_block);
51045 0 : memset(&rep, 0, sizeof(rep));
51046 0 : _knnreport_init(&rep, _state, ae_true);
51047 :
51048 0 : knnallerrors(model, xy, npoints, &rep, _state);
51049 0 : result = rep.avgce;
51050 0 : ae_frame_leave(_state);
51051 0 : return result;
51052 : }
51053 :
51054 :
51055 : /*************************************************************************
51056 : RMS error on the test set.
51057 :
51058 : Its meaning for regression task is obvious. As for classification problems,
51059 : RMS error means error when estimating posterior probabilities.
51060 :
51061 : INPUT PARAMETERS:
51062 : Model - KNN model
51063 : XY - test set
51064 : NPoints - test set size
51065 :
51066 : RESULT:
51067 : root mean square error.
51068 :
51069 : NOTE: if you need several different kinds of error metrics, it is better
51070 : to use knnallerrors() which computes all error metric with just one
51071 : pass over dataset.
51072 :
51073 : -- ALGLIB --
51074 : Copyright 15.02.2019 by Bochkanov Sergey
51075 : *************************************************************************/
51076 0 : double knnrmserror(knnmodel* model,
51077 : /* Real */ ae_matrix* xy,
51078 : ae_int_t npoints,
51079 : ae_state *_state)
51080 : {
51081 : ae_frame _frame_block;
51082 : knnreport rep;
51083 : double result;
51084 :
51085 0 : ae_frame_make(_state, &_frame_block);
51086 0 : memset(&rep, 0, sizeof(rep));
51087 0 : _knnreport_init(&rep, _state, ae_true);
51088 :
51089 0 : knnallerrors(model, xy, npoints, &rep, _state);
51090 0 : result = rep.rmserror;
51091 0 : ae_frame_leave(_state);
51092 0 : return result;
51093 : }
51094 :
51095 :
51096 : /*************************************************************************
51097 : Average error on the test set
51098 :
51099 : Its meaning for regression task is obvious. As for classification problems,
51100 : average error means error when estimating posterior probabilities.
51101 :
51102 : INPUT PARAMETERS:
51103 : Model - KNN model
51104 : XY - test set
51105 : NPoints - test set size
51106 :
51107 : RESULT:
51108 : average error
51109 :
51110 : NOTE: if you need several different kinds of error metrics, it is better
51111 : to use knnallerrors() which computes all error metric with just one
51112 : pass over dataset.
51113 :
51114 : -- ALGLIB --
51115 : Copyright 15.02.2019 by Bochkanov Sergey
51116 : *************************************************************************/
51117 0 : double knnavgerror(knnmodel* model,
51118 : /* Real */ ae_matrix* xy,
51119 : ae_int_t npoints,
51120 : ae_state *_state)
51121 : {
51122 : ae_frame _frame_block;
51123 : knnreport rep;
51124 : double result;
51125 :
51126 0 : ae_frame_make(_state, &_frame_block);
51127 0 : memset(&rep, 0, sizeof(rep));
51128 0 : _knnreport_init(&rep, _state, ae_true);
51129 :
51130 0 : knnallerrors(model, xy, npoints, &rep, _state);
51131 0 : result = rep.avgerror;
51132 0 : ae_frame_leave(_state);
51133 0 : return result;
51134 : }
51135 :
51136 :
51137 : /*************************************************************************
51138 : Average relative error on the test set
51139 :
51140 : Its meaning for regression task is obvious. As for classification problems,
51141 : average relative error means error when estimating posterior probabilities.
51142 :
51143 : INPUT PARAMETERS:
51144 : Model - KNN model
51145 : XY - test set
51146 : NPoints - test set size
51147 :
51148 : RESULT:
51149 : average relative error
51150 :
51151 : NOTE: if you need several different kinds of error metrics, it is better
51152 : to use knnallerrors() which computes all error metric with just one
51153 : pass over dataset.
51154 :
51155 : -- ALGLIB --
51156 : Copyright 15.02.2019 by Bochkanov Sergey
51157 : *************************************************************************/
51158 0 : double knnavgrelerror(knnmodel* model,
51159 : /* Real */ ae_matrix* xy,
51160 : ae_int_t npoints,
51161 : ae_state *_state)
51162 : {
51163 : ae_frame _frame_block;
51164 : knnreport rep;
51165 : double result;
51166 :
51167 0 : ae_frame_make(_state, &_frame_block);
51168 0 : memset(&rep, 0, sizeof(rep));
51169 0 : _knnreport_init(&rep, _state, ae_true);
51170 :
51171 0 : knnallerrors(model, xy, npoints, &rep, _state);
51172 0 : result = rep.avgrelerror;
51173 0 : ae_frame_leave(_state);
51174 0 : return result;
51175 : }
51176 :
51177 :
51178 : /*************************************************************************
51179 : Calculates all kinds of errors for the model in one call.
51180 :
51181 : INPUT PARAMETERS:
51182 : Model - KNN model
51183 : XY - test set:
51184 : * one row per point
51185 : * first NVars columns store independent variables
51186 : * depending on problem type:
51187 : * next column stores class number in [0,NClasses) - for
51188 : classification problems
51189 : * next NOut columns store dependent variables - for
51190 : regression problems
51191 : NPoints - test set size, NPoints>=0
51192 :
51193 : OUTPUT PARAMETERS:
51194 : Rep - following fields are loaded with errors for both regression
51195 : and classification models:
51196 : * rep.rmserror - RMS error for the output
51197 : * rep.avgerror - average error
51198 : * rep.avgrelerror - average relative error
51199 : following fields are set only for classification models,
51200 : zero for regression ones:
51201 : * relclserror - relative classification error, in [0,1]
51202 : * avgce - average cross-entropy in bits per dataset entry
51203 :
51204 : NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
51205 : models (such models can report exactly zero probabilities), so we
51206 : do not recommend using it.
51207 :
51208 : -- ALGLIB --
51209 : Copyright 15.02.2019 by Bochkanov Sergey
51210 : *************************************************************************/
51211 0 : void knnallerrors(knnmodel* model,
51212 : /* Real */ ae_matrix* xy,
51213 : ae_int_t npoints,
51214 : knnreport* rep,
51215 : ae_state *_state)
51216 : {
51217 : ae_frame _frame_block;
51218 : knnbuffer buf;
51219 : ae_vector desiredy;
51220 : ae_vector errbuf;
51221 : ae_int_t nvars;
51222 : ae_int_t nout;
51223 : ae_int_t ny;
51224 : ae_bool iscls;
51225 : ae_int_t i;
51226 : ae_int_t j;
51227 :
51228 0 : ae_frame_make(_state, &_frame_block);
51229 0 : memset(&buf, 0, sizeof(buf));
51230 0 : memset(&desiredy, 0, sizeof(desiredy));
51231 0 : memset(&errbuf, 0, sizeof(errbuf));
51232 0 : _knnreport_clear(rep);
51233 0 : _knnbuffer_init(&buf, _state, ae_true);
51234 0 : ae_vector_init(&desiredy, 0, DT_REAL, _state, ae_true);
51235 0 : ae_vector_init(&errbuf, 0, DT_REAL, _state, ae_true);
51236 :
51237 0 : nvars = model->nvars;
51238 0 : nout = model->nout;
51239 0 : iscls = model->iscls;
51240 0 : if( iscls )
51241 : {
51242 0 : ny = 1;
51243 : }
51244 : else
51245 : {
51246 0 : ny = nout;
51247 : }
51248 :
51249 : /*
51250 : * Check input
51251 : */
51252 0 : ae_assert(npoints>=0, "knnallerrors: npoints<0", _state);
51253 0 : ae_assert(xy->rows>=npoints, "knnallerrors: rows(xy)<npoints", _state);
51254 0 : ae_assert(xy->cols>=nvars+ny, "knnallerrors: cols(xy)<nvars+nout", _state);
51255 0 : ae_assert(apservisfinitematrix(xy, npoints, nvars+ny, _state), "knnallerrors: xy parameter contains INFs or NANs", _state);
51256 :
51257 : /*
51258 : * Clean up report
51259 : */
51260 0 : knn_clearreport(rep, _state);
51261 :
51262 : /*
51263 : * Quick exit if needed
51264 : */
51265 0 : if( model->isdummy||npoints==0 )
51266 : {
51267 0 : ae_frame_leave(_state);
51268 0 : return;
51269 : }
51270 :
51271 : /*
51272 : * Process using local buffer
51273 : */
51274 0 : knncreatebuffer(model, &buf, _state);
51275 0 : if( iscls )
51276 : {
51277 0 : dserrallocate(nout, &errbuf, _state);
51278 : }
51279 : else
51280 : {
51281 0 : dserrallocate(-nout, &errbuf, _state);
51282 : }
51283 0 : ae_vector_set_length(&desiredy, ny, _state);
51284 0 : for(i=0; i<=npoints-1; i++)
51285 : {
51286 0 : for(j=0; j<=nvars-1; j++)
51287 : {
51288 0 : buf.x.ptr.p_double[j] = xy->ptr.pp_double[i][j];
51289 : }
51290 0 : if( iscls )
51291 : {
51292 0 : j = ae_round(xy->ptr.pp_double[i][nvars], _state);
51293 0 : ae_assert(j>=0&&j<nout, "knnallerrors: one of the class labels is not in [0,NClasses)", _state);
51294 0 : desiredy.ptr.p_double[0] = (double)(j);
51295 : }
51296 : else
51297 : {
51298 0 : for(j=0; j<=nout-1; j++)
51299 : {
51300 0 : desiredy.ptr.p_double[j] = xy->ptr.pp_double[i][nvars+j];
51301 : }
51302 : }
51303 0 : knn_processinternal(model, &buf, _state);
51304 0 : dserraccumulate(&errbuf, &buf.y, &desiredy, _state);
51305 : }
51306 0 : dserrfinish(&errbuf, _state);
51307 :
51308 : /*
51309 : * Extract results
51310 : */
51311 0 : if( iscls )
51312 : {
51313 0 : rep->relclserror = errbuf.ptr.p_double[0];
51314 0 : rep->avgce = errbuf.ptr.p_double[1];
51315 : }
51316 0 : rep->rmserror = errbuf.ptr.p_double[2];
51317 0 : rep->avgerror = errbuf.ptr.p_double[3];
51318 0 : rep->avgrelerror = errbuf.ptr.p_double[4];
51319 0 : ae_frame_leave(_state);
51320 : }
51321 :
51322 :
51323 : /*************************************************************************
51324 : Serializer: allocation
51325 :
51326 : -- ALGLIB --
51327 : Copyright 15.02.2019 by Bochkanov Sergey
51328 : *************************************************************************/
51329 0 : void knnalloc(ae_serializer* s, knnmodel* model, ae_state *_state)
51330 : {
51331 :
51332 :
51333 0 : ae_serializer_alloc_entry(s);
51334 0 : ae_serializer_alloc_entry(s);
51335 0 : ae_serializer_alloc_entry(s);
51336 0 : ae_serializer_alloc_entry(s);
51337 0 : ae_serializer_alloc_entry(s);
51338 0 : ae_serializer_alloc_entry(s);
51339 0 : ae_serializer_alloc_entry(s);
51340 0 : ae_serializer_alloc_entry(s);
51341 0 : if( !model->isdummy )
51342 : {
51343 0 : kdtreealloc(s, &model->tree, _state);
51344 : }
51345 0 : }
51346 :
51347 :
51348 : /*************************************************************************
51349 : Serializer: serialization
51350 :
51351 : -- ALGLIB --
51352 : Copyright 15.02.2019 by Bochkanov Sergey
51353 : *************************************************************************/
51354 0 : void knnserialize(ae_serializer* s, knnmodel* model, ae_state *_state)
51355 : {
51356 :
51357 :
51358 0 : ae_serializer_serialize_int(s, getknnserializationcode(_state), _state);
51359 0 : ae_serializer_serialize_int(s, knn_knnfirstversion, _state);
51360 0 : ae_serializer_serialize_int(s, model->nvars, _state);
51361 0 : ae_serializer_serialize_int(s, model->nout, _state);
51362 0 : ae_serializer_serialize_int(s, model->k, _state);
51363 0 : ae_serializer_serialize_double(s, model->eps, _state);
51364 0 : ae_serializer_serialize_bool(s, model->iscls, _state);
51365 0 : ae_serializer_serialize_bool(s, model->isdummy, _state);
51366 0 : if( !model->isdummy )
51367 : {
51368 0 : kdtreeserialize(s, &model->tree, _state);
51369 : }
51370 0 : }
51371 :
51372 :
51373 : /*************************************************************************
51374 : Serializer: unserialization
51375 :
51376 : -- ALGLIB --
51377 : Copyright 15.02.2019 by Bochkanov Sergey
51378 : *************************************************************************/
51379 0 : void knnunserialize(ae_serializer* s, knnmodel* model, ae_state *_state)
51380 : {
51381 : ae_int_t i0;
51382 : ae_int_t i1;
51383 :
51384 0 : _knnmodel_clear(model);
51385 :
51386 :
51387 : /*
51388 : * check correctness of header
51389 : */
51390 0 : ae_serializer_unserialize_int(s, &i0, _state);
51391 0 : ae_assert(i0==getknnserializationcode(_state), "KNNUnserialize: stream header corrupted", _state);
51392 0 : ae_serializer_unserialize_int(s, &i1, _state);
51393 0 : ae_assert(i1==knn_knnfirstversion, "KNNUnserialize: stream header corrupted", _state);
51394 :
51395 : /*
51396 : * Unserialize data
51397 : */
51398 0 : ae_serializer_unserialize_int(s, &model->nvars, _state);
51399 0 : ae_serializer_unserialize_int(s, &model->nout, _state);
51400 0 : ae_serializer_unserialize_int(s, &model->k, _state);
51401 0 : ae_serializer_unserialize_double(s, &model->eps, _state);
51402 0 : ae_serializer_unserialize_bool(s, &model->iscls, _state);
51403 0 : ae_serializer_unserialize_bool(s, &model->isdummy, _state);
51404 0 : if( !model->isdummy )
51405 : {
51406 0 : kdtreeunserialize(s, &model->tree, _state);
51407 : }
51408 :
51409 : /*
51410 : * Prepare local buffer
51411 : */
51412 0 : knncreatebuffer(model, &model->buffer, _state);
51413 0 : }
51414 :
51415 :
51416 : /*************************************************************************
51417 : Sets report fields to their default values
51418 :
51419 : -- ALGLIB --
51420 : Copyright 15.02.2019 by Bochkanov Sergey
51421 : *************************************************************************/
51422 0 : static void knn_clearreport(knnreport* rep, ae_state *_state)
51423 : {
51424 :
51425 :
51426 0 : rep->relclserror = (double)(0);
51427 0 : rep->avgce = (double)(0);
51428 0 : rep->rmserror = (double)(0);
51429 0 : rep->avgerror = (double)(0);
51430 0 : rep->avgrelerror = (double)(0);
51431 0 : }
51432 :
51433 :
51434 : /*************************************************************************
51435 : This function processes buf.X and stores result to buf.Y
51436 :
51437 : INPUT PARAMETERS
51438 : Model - KNN model
51439 : Buf - processing buffer.
51440 :
51441 :
51442 : IMPORTANT: buffer object should be used only with model which was used to
51443 : initialize buffer. Any attempt to use buffer with different
51444 : object is dangerous - you may get integrity check failure
51445 : (exception) because sizes of internal arrays do not fit to
51446 : dimensions of the model structure.
51447 :
51448 : -- ALGLIB --
51449 : Copyright 15.02.2019 by Bochkanov Sergey
51450 : *************************************************************************/
51451 0 : static void knn_processinternal(knnmodel* model,
51452 : knnbuffer* buf,
51453 : ae_state *_state)
51454 : {
51455 : ae_int_t nvars;
51456 : ae_int_t nout;
51457 : ae_bool iscls;
51458 : ae_int_t nncnt;
51459 : ae_int_t i;
51460 : ae_int_t j;
51461 : double v;
51462 :
51463 :
51464 0 : nvars = model->nvars;
51465 0 : nout = model->nout;
51466 0 : iscls = model->iscls;
51467 :
51468 : /*
51469 : * Quick exit if needed
51470 : */
51471 0 : if( model->isdummy )
51472 : {
51473 0 : for(i=0; i<=nout-1; i++)
51474 : {
51475 0 : buf->y.ptr.p_double[i] = (double)(0);
51476 : }
51477 0 : return;
51478 : }
51479 :
51480 : /*
51481 : * Perform request, average results
51482 : */
51483 0 : for(i=0; i<=nout-1; i++)
51484 : {
51485 0 : buf->y.ptr.p_double[i] = (double)(0);
51486 : }
51487 0 : nncnt = kdtreetsqueryaknn(&model->tree, &buf->treebuf, &buf->x, model->k, ae_true, model->eps, _state);
51488 0 : v = 1/coalesce((double)(nncnt), (double)(1), _state);
51489 0 : if( iscls )
51490 : {
51491 0 : kdtreetsqueryresultstags(&model->tree, &buf->treebuf, &buf->tags, _state);
51492 0 : for(i=0; i<=nncnt-1; i++)
51493 : {
51494 0 : j = buf->tags.ptr.p_int[i];
51495 0 : buf->y.ptr.p_double[j] = buf->y.ptr.p_double[j]+v;
51496 : }
51497 : }
51498 : else
51499 : {
51500 0 : kdtreetsqueryresultsxy(&model->tree, &buf->treebuf, &buf->xy, _state);
51501 0 : for(i=0; i<=nncnt-1; i++)
51502 : {
51503 0 : for(j=0; j<=nout-1; j++)
51504 : {
51505 0 : buf->y.ptr.p_double[j] = buf->y.ptr.p_double[j]+v*buf->xy.ptr.pp_double[i][nvars+j];
51506 : }
51507 : }
51508 : }
51509 : }
51510 :
51511 :
51512 0 : void _knnbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic)
51513 : {
51514 0 : knnbuffer *p = (knnbuffer*)_p;
51515 0 : ae_touch_ptr((void*)p);
51516 0 : _kdtreerequestbuffer_init(&p->treebuf, _state, make_automatic);
51517 0 : ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
51518 0 : ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
51519 0 : ae_vector_init(&p->tags, 0, DT_INT, _state, make_automatic);
51520 0 : ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
51521 0 : }
51522 :
51523 :
51524 0 : void _knnbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
51525 : {
51526 0 : knnbuffer *dst = (knnbuffer*)_dst;
51527 0 : knnbuffer *src = (knnbuffer*)_src;
51528 0 : _kdtreerequestbuffer_init_copy(&dst->treebuf, &src->treebuf, _state, make_automatic);
51529 0 : ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
51530 0 : ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
51531 0 : ae_vector_init_copy(&dst->tags, &src->tags, _state, make_automatic);
51532 0 : ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
51533 0 : }
51534 :
51535 :
51536 0 : void _knnbuffer_clear(void* _p)
51537 : {
51538 0 : knnbuffer *p = (knnbuffer*)_p;
51539 0 : ae_touch_ptr((void*)p);
51540 0 : _kdtreerequestbuffer_clear(&p->treebuf);
51541 0 : ae_vector_clear(&p->x);
51542 0 : ae_vector_clear(&p->y);
51543 0 : ae_vector_clear(&p->tags);
51544 0 : ae_matrix_clear(&p->xy);
51545 0 : }
51546 :
51547 :
51548 0 : void _knnbuffer_destroy(void* _p)
51549 : {
51550 0 : knnbuffer *p = (knnbuffer*)_p;
51551 0 : ae_touch_ptr((void*)p);
51552 0 : _kdtreerequestbuffer_destroy(&p->treebuf);
51553 0 : ae_vector_destroy(&p->x);
51554 0 : ae_vector_destroy(&p->y);
51555 0 : ae_vector_destroy(&p->tags);
51556 0 : ae_matrix_destroy(&p->xy);
51557 0 : }
51558 :
51559 :
51560 0 : void _knnbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic)
51561 : {
51562 0 : knnbuilder *p = (knnbuilder*)_p;
51563 0 : ae_touch_ptr((void*)p);
51564 0 : ae_matrix_init(&p->dsdata, 0, 0, DT_REAL, _state, make_automatic);
51565 0 : ae_vector_init(&p->dsrval, 0, DT_REAL, _state, make_automatic);
51566 0 : ae_vector_init(&p->dsival, 0, DT_INT, _state, make_automatic);
51567 0 : }
51568 :
51569 :
51570 0 : void _knnbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
51571 : {
51572 0 : knnbuilder *dst = (knnbuilder*)_dst;
51573 0 : knnbuilder *src = (knnbuilder*)_src;
51574 0 : dst->dstype = src->dstype;
51575 0 : dst->npoints = src->npoints;
51576 0 : dst->nvars = src->nvars;
51577 0 : dst->iscls = src->iscls;
51578 0 : dst->nout = src->nout;
51579 0 : ae_matrix_init_copy(&dst->dsdata, &src->dsdata, _state, make_automatic);
51580 0 : ae_vector_init_copy(&dst->dsrval, &src->dsrval, _state, make_automatic);
51581 0 : ae_vector_init_copy(&dst->dsival, &src->dsival, _state, make_automatic);
51582 0 : dst->knnnrm = src->knnnrm;
51583 0 : }
51584 :
51585 :
51586 0 : void _knnbuilder_clear(void* _p)
51587 : {
51588 0 : knnbuilder *p = (knnbuilder*)_p;
51589 0 : ae_touch_ptr((void*)p);
51590 0 : ae_matrix_clear(&p->dsdata);
51591 0 : ae_vector_clear(&p->dsrval);
51592 0 : ae_vector_clear(&p->dsival);
51593 0 : }
51594 :
51595 :
51596 0 : void _knnbuilder_destroy(void* _p)
51597 : {
51598 0 : knnbuilder *p = (knnbuilder*)_p;
51599 0 : ae_touch_ptr((void*)p);
51600 0 : ae_matrix_destroy(&p->dsdata);
51601 0 : ae_vector_destroy(&p->dsrval);
51602 0 : ae_vector_destroy(&p->dsival);
51603 0 : }
51604 :
51605 :
51606 0 : void _knnmodel_init(void* _p, ae_state *_state, ae_bool make_automatic)
51607 : {
51608 0 : knnmodel *p = (knnmodel*)_p;
51609 0 : ae_touch_ptr((void*)p);
51610 0 : _kdtree_init(&p->tree, _state, make_automatic);
51611 0 : _knnbuffer_init(&p->buffer, _state, make_automatic);
51612 0 : }
51613 :
51614 :
51615 0 : void _knnmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
51616 : {
51617 0 : knnmodel *dst = (knnmodel*)_dst;
51618 0 : knnmodel *src = (knnmodel*)_src;
51619 0 : dst->nvars = src->nvars;
51620 0 : dst->nout = src->nout;
51621 0 : dst->k = src->k;
51622 0 : dst->eps = src->eps;
51623 0 : dst->iscls = src->iscls;
51624 0 : dst->isdummy = src->isdummy;
51625 0 : _kdtree_init_copy(&dst->tree, &src->tree, _state, make_automatic);
51626 0 : _knnbuffer_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
51627 0 : }
51628 :
51629 :
51630 0 : void _knnmodel_clear(void* _p)
51631 : {
51632 0 : knnmodel *p = (knnmodel*)_p;
51633 0 : ae_touch_ptr((void*)p);
51634 0 : _kdtree_clear(&p->tree);
51635 0 : _knnbuffer_clear(&p->buffer);
51636 0 : }
51637 :
51638 :
51639 0 : void _knnmodel_destroy(void* _p)
51640 : {
51641 0 : knnmodel *p = (knnmodel*)_p;
51642 0 : ae_touch_ptr((void*)p);
51643 0 : _kdtree_destroy(&p->tree);
51644 0 : _knnbuffer_destroy(&p->buffer);
51645 0 : }
51646 :
51647 :
51648 0 : void _knnreport_init(void* _p, ae_state *_state, ae_bool make_automatic)
51649 : {
51650 0 : knnreport *p = (knnreport*)_p;
51651 0 : ae_touch_ptr((void*)p);
51652 0 : }
51653 :
51654 :
51655 0 : void _knnreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
51656 : {
51657 0 : knnreport *dst = (knnreport*)_dst;
51658 0 : knnreport *src = (knnreport*)_src;
51659 0 : dst->relclserror = src->relclserror;
51660 0 : dst->avgce = src->avgce;
51661 0 : dst->rmserror = src->rmserror;
51662 0 : dst->avgerror = src->avgerror;
51663 0 : dst->avgrelerror = src->avgrelerror;
51664 0 : }
51665 :
51666 :
51667 0 : void _knnreport_clear(void* _p)
51668 : {
51669 0 : knnreport *p = (knnreport*)_p;
51670 0 : ae_touch_ptr((void*)p);
51671 0 : }
51672 :
51673 :
51674 0 : void _knnreport_destroy(void* _p)
51675 : {
51676 0 : knnreport *p = (knnreport*)_p;
51677 0 : ae_touch_ptr((void*)p);
51678 0 : }
51679 :
51680 :
51681 : #endif
51682 : #if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
51683 :
51684 :
51685 : /*************************************************************************
51686 : k-means++ clusterization.
51687 : Backward compatibility function, we recommend to use CLUSTERING subpackage
51688 : as better replacement.
51689 :
51690 : -- ALGLIB --
51691 : Copyright 21.03.2009 by Bochkanov Sergey
51692 : *************************************************************************/
51693 0 : void kmeansgenerate(/* Real */ ae_matrix* xy,
51694 : ae_int_t npoints,
51695 : ae_int_t nvars,
51696 : ae_int_t k,
51697 : ae_int_t restarts,
51698 : ae_int_t* info,
51699 : /* Real */ ae_matrix* c,
51700 : /* Integer */ ae_vector* xyc,
51701 : ae_state *_state)
51702 : {
51703 : ae_frame _frame_block;
51704 : ae_matrix dummy;
51705 : ae_int_t itscnt;
51706 : double e;
51707 : kmeansbuffers buf;
51708 :
51709 0 : ae_frame_make(_state, &_frame_block);
51710 0 : memset(&dummy, 0, sizeof(dummy));
51711 0 : memset(&buf, 0, sizeof(buf));
51712 0 : *info = 0;
51713 0 : ae_matrix_clear(c);
51714 0 : ae_vector_clear(xyc);
51715 0 : ae_matrix_init(&dummy, 0, 0, DT_REAL, _state, ae_true);
51716 0 : _kmeansbuffers_init(&buf, _state, ae_true);
51717 :
51718 0 : kmeansinitbuf(&buf, _state);
51719 0 : kmeansgenerateinternal(xy, npoints, nvars, k, 0, 1, 0, restarts, ae_false, info, &itscnt, c, ae_true, &dummy, ae_false, xyc, &e, &buf, _state);
51720 0 : ae_frame_leave(_state);
51721 0 : }
51722 :
51723 :
51724 : #endif
51725 :
51726 : }
51727 :
|