included kernel ridge regression module
This commit is contained in:
parent
7172e26a50
commit
c0ce70fbc0
2
.gitignore
vendored
2
.gitignore
vendored
@ -54,6 +54,8 @@ t
|
|||||||
test
|
test
|
||||||
fi
|
fi
|
||||||
tX
|
tX
|
||||||
|
test_reg
|
||||||
|
test_regsurf
|
||||||
*.tar.bz2
|
*.tar.bz2
|
||||||
.*.swp
|
.*.swp
|
||||||
# CVS default ignores end
|
# CVS default ignores end
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
lib_LTLIBRARIES = libla.la
|
lib_LTLIBRARIES = libla.la
|
||||||
include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h
|
include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h reg.h regsurf.h
|
||||||
libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc
|
libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc reg.cc regsurf.cc
|
||||||
nodist_libla_la_SOURCES = version.cc
|
nodist_libla_la_SOURCES = version.cc
|
||||||
check_PROGRAMS = t test tX
|
check_PROGRAMS = t test tX test_reg test_regsurf
|
||||||
t_SOURCES = t.cc t2.cc
|
t_SOURCES = t.cc t2.cc
|
||||||
#nodist_t_SOURCES =
|
#nodist_t_SOURCES =
|
||||||
test_SOURCES = test.cc
|
test_SOURCES = test.cc
|
||||||
tX_SOURCES = tX.cc
|
tX_SOURCES = tX.cc
|
||||||
|
test_reg_SOURCES = test_reg.cc
|
||||||
|
test_regsurf_SOURCES = test_regsurf.cc
|
||||||
LDADD = .libs/libla.a
|
LDADD = .libs/libla.a
|
||||||
ACLOCAL_AMFLAGS = -I m4
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
EXTRA_DIST = get_git_version LICENSE doxygen.cfg aminclude.am acinclude.m4 footer.html
|
EXTRA_DIST = get_git_version LICENSE doxygen.cfg aminclude.am acinclude.m4 footer.html
|
||||||
|
851
reg.cc
Normal file
851
reg.cc
Normal file
@ -0,0 +1,851 @@
|
|||||||
|
/*
|
||||||
|
LA: linear algebra C++ interface library
|
||||||
|
|
||||||
|
Kernel ridge regression module Copyright (C) 2024
|
||||||
|
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "reg.h"
|
||||||
|
|
||||||
|
namespace LA {
|
||||||
|
template <typename T>
|
||||||
|
T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
T result = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < dimensions; i++)
|
||||||
|
result = result + (x[i] * x[i]);
|
||||||
|
|
||||||
|
result = std::exp(-result/(2 * sigma * sigma));
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
T result = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < dimensions; i++)
|
||||||
|
result = result + abs(x[i]);
|
||||||
|
|
||||||
|
result = std::exp(-abs(result)/(sigma));
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
T result = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < dimensions; i++)
|
||||||
|
result = result + (x[i] * x[i]);
|
||||||
|
|
||||||
|
result = std::exp(-std::pow(result, 0.5)/sigma);
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n,
|
||||||
|
const T* Matern_comb_number)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
unsigned int k;
|
||||||
|
T part_1 = 0; // sum(x^2)^1/2
|
||||||
|
T exponential_part;
|
||||||
|
T Matern_part = 0;
|
||||||
|
T result;
|
||||||
|
|
||||||
|
for (i = 0; i < dimensions; i++)
|
||||||
|
part_1 = part_1 + (x[i] * x[i]);
|
||||||
|
|
||||||
|
part_1 = std::pow(part_1, 0.5);
|
||||||
|
exponential_part = std::exp(-part_1/(sigma));
|
||||||
|
|
||||||
|
for (k = 0; k < n; k++)
|
||||||
|
Matern_part += Matern_comb_number[i] * std::pow(2 * part_1/sigma, n - k);
|
||||||
|
|
||||||
|
result = exponential_part * Matern_part;
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int Matern_coefficient(const unsigned int n, T* comb_number) // values of (n + k)!/(2 n)! * n!/(k!(n-k)!)
|
||||||
|
{
|
||||||
|
unsigned int i , j;
|
||||||
|
unsigned int k;
|
||||||
|
unsigned int part_1; // reverse value of (n + k)!/(2 n)!
|
||||||
|
unsigned int part_2; // n!/(n-k)!
|
||||||
|
unsigned int part_3; // k!
|
||||||
|
|
||||||
|
for (k = 0; k < n; k++)
|
||||||
|
{
|
||||||
|
part_1 = 1;
|
||||||
|
part_2 = 1;
|
||||||
|
part_3 = 1;
|
||||||
|
|
||||||
|
for (i = n + k + 1; i <= 2 * n; i++)
|
||||||
|
part_1 = part_1 * i;
|
||||||
|
|
||||||
|
for (i = n - k + 1; i <= n; i++)
|
||||||
|
part_2 = part_2 * i;
|
||||||
|
|
||||||
|
for (i = 1; i < k; i++)
|
||||||
|
part_3 = part_3 * i;
|
||||||
|
|
||||||
|
comb_number[k] = T(part_2)/T(part_1 * part_3);
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
REG<T>::REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel)
|
||||||
|
{
|
||||||
|
Init(x, y , params, data_size, x_dimensions, kernel);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
REG<T>::REG(const string x_file, const string y_file, const string param_file)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
ifstream xfile(x_file);
|
||||||
|
ifstream yfile(y_file);
|
||||||
|
ifstream paramfile(param_file);
|
||||||
|
string line;
|
||||||
|
T line_value;
|
||||||
|
// parameters of model
|
||||||
|
unsigned int data_size;
|
||||||
|
unsigned int x_dimensions;
|
||||||
|
string kernel;
|
||||||
|
|
||||||
|
REG_param_file = new T[4];
|
||||||
|
if(!REG_param_file) laerror("allocation failure in REG constructor");
|
||||||
|
// loading parameters for regression model from param_file
|
||||||
|
if (paramfile.is_open())
|
||||||
|
{
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
data_size = stoi(line);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
x_dimensions = stoi(line);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
kernel = line;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
REG_param_file[0] = stod(line);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
REG_param_file[1] = stod(line);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
REG_param_file[2] = stod(line);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (kernel == "Matern")
|
||||||
|
{
|
||||||
|
if (getline(paramfile, line))
|
||||||
|
REG_param_file[3] = static_cast<unsigned int>(stod(line) + 0.5);
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
REG_x_file = new T[data_size * x_dimensions];
|
||||||
|
REG_y_file = new T[data_size];
|
||||||
|
if(!REG_x_file || !REG_y_file) laerror("allocation failure 2 in REG constructor");
|
||||||
|
|
||||||
|
// loading x and y data for regression model from files
|
||||||
|
if (xfile.is_open() and yfile.is_open())
|
||||||
|
{
|
||||||
|
for (i = 0; i < data_size * x_dimensions; i++)
|
||||||
|
{
|
||||||
|
if (getline(xfile, line))
|
||||||
|
{
|
||||||
|
line_value = stod(line);
|
||||||
|
REG_x_file[i] = line_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (i = 0; i < data_size; i++)
|
||||||
|
{
|
||||||
|
if (getline(yfile, line))
|
||||||
|
{
|
||||||
|
line_value = stod(line);
|
||||||
|
REG_y_file[i] = line_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
xfile.close();
|
||||||
|
yfile.close();
|
||||||
|
paramfile.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
Init(REG_x_file, REG_y_file, REG_param_file, data_size, x_dimensions, kernel);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void REG<T>::Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions,
|
||||||
|
const string kernel)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
// Setting type of kernel
|
||||||
|
REG_kernel_type = kernel;
|
||||||
|
if (kernel != "Gaussian" and kernel != "Laplacian" and kernel != "Exponential" and kernel != "Matern")
|
||||||
|
return;
|
||||||
|
if (kernel == "Gaussian")
|
||||||
|
REG_kernel_id = 0;
|
||||||
|
if (kernel == "Laplacian")
|
||||||
|
REG_kernel_id = 1;
|
||||||
|
if (kernel == "Exponential")
|
||||||
|
REG_kernel_id = 2;
|
||||||
|
if (kernel == "Matern")
|
||||||
|
REG_kernel_id = 3;
|
||||||
|
// copy parameters and data
|
||||||
|
REG_lambda= params[0];
|
||||||
|
REG_alpha = params[1];
|
||||||
|
if (REG_alpha > 1)
|
||||||
|
REG_alpha = 1;
|
||||||
|
if (REG_alpha < 0)
|
||||||
|
REG_alpha = 0;
|
||||||
|
REG_sigma = params[2];
|
||||||
|
if (kernel == "Matern")
|
||||||
|
REG_n = static_cast<unsigned int>(params[3] + 0.5);
|
||||||
|
|
||||||
|
REG_count_kernels = data_size;
|
||||||
|
REG_count_dimensions = x_dimensions;
|
||||||
|
REG_aloc_init = false;
|
||||||
|
{
|
||||||
|
REG_params = new T[4];
|
||||||
|
if(!REG_params) laerror("allocation failure in REG::Init");
|
||||||
|
if (REG_kernel_id == 3)
|
||||||
|
{
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
REG_params[i] = params[i];
|
||||||
|
REG_Matern_comb_number_vec = NRVec<T>(REG_n);
|
||||||
|
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
REG_params[i] = params[i];
|
||||||
|
|
||||||
|
REG_Matern_comb_number_vec = NRVec<T>(1);
|
||||||
|
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy data
|
||||||
|
REG_coordinates_vec = NRVec<T>(x, data_size * REG_count_dimensions);
|
||||||
|
REG_coordinates = REG_coordinates_vec.operator T*();
|
||||||
|
REG_y_vec = NRVec<T>(y , data_size);
|
||||||
|
REG_Y = REG_y_vec.operator T*();
|
||||||
|
// for fitting model
|
||||||
|
REG_distances_mat = NRMat<T>(data_size, data_size);
|
||||||
|
REG_distances = REG_distances_mat.operator T*();
|
||||||
|
REG_kernels_mat = NRMat<T>(data_size, data_size);
|
||||||
|
REG_kernels = REG_kernels_mat.operator T*();
|
||||||
|
REG_kernels_sums_vec = NRVec<T>(data_size);
|
||||||
|
REG_kernels_sums = REG_kernels_sums_vec.operator T*();
|
||||||
|
REG_weights_vec = NRVec<T>(data_size);
|
||||||
|
REG_weights = REG_weights_vec.operator T*();
|
||||||
|
// for predictions
|
||||||
|
REG_coordinates_diff_vec = NRVec<T>(data_size);
|
||||||
|
REG_coordinates_diff = REG_coordinates_diff_vec.operator T*();
|
||||||
|
REG_kernels_diff_vec = NRVec<T>(data_size);
|
||||||
|
REG_kernels_diff = REG_kernels_diff_vec.operator T*();
|
||||||
|
REG_y_preds_vec = NRVec<T>(data_size);
|
||||||
|
REG_y_preds = REG_y_preds_vec.operator T*();
|
||||||
|
REG_y_delta_vec = NRVec<T>(data_size);
|
||||||
|
REG_y_delta = REG_y_delta_vec.operator T*();
|
||||||
|
REG_y_temp_vec = NRVec<T>(data_size);
|
||||||
|
REG_y_temp = REG_y_temp_vec.operator T*();
|
||||||
|
|
||||||
|
REG_aloc_init = true;
|
||||||
|
|
||||||
|
// calculating Matern coefficients, if Matern kernel is selected
|
||||||
|
if (REG_kernel_id == 3)
|
||||||
|
Matern_coefficient(REG_n, REG_Matern_comb_number);
|
||||||
|
|
||||||
|
REG_base_init = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T REG<T>::Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id)
|
||||||
|
{
|
||||||
|
T result;
|
||||||
|
|
||||||
|
switch (kernel_id) {
|
||||||
|
case 0:
|
||||||
|
result = Gaussian_kernel(x, dimensions, REG_sigma);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
result = Laplacian_kernel(x, dimensions, REG_sigma);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
result = Laplacian_kernel(x, dimensions, REG_sigma);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
result = Matern_kernel(x, dimensions, REG_sigma, REG_n, REG_Matern_comb_number);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::Build_REG_distances_mat()
|
||||||
|
{
|
||||||
|
unsigned int i, j, k;
|
||||||
|
unsigned int mat_dimension, vec_dimension;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
mat_dimension = REG_count_kernels;
|
||||||
|
vec_dimension = REG_count_dimensions;
|
||||||
|
// building the matrix of vectors coordinates differences between kernels
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
{
|
||||||
|
for (j = i + 1; j < mat_dimension; j++)
|
||||||
|
for (k = 0; k < vec_dimension; k++)
|
||||||
|
{
|
||||||
|
REG_distances[(i * (mat_dimension) + j) * vec_dimension + k] = REG_coordinates[i * vec_dimension + k] -
|
||||||
|
REG_coordinates[j * vec_dimension + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// copy calculated values under diagonal
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
{
|
||||||
|
for (j = i + 1; j < mat_dimension; j++)
|
||||||
|
for (k = 0; k < vec_dimension; k++)
|
||||||
|
{
|
||||||
|
REG_distances[(j * (mat_dimension) + i) * vec_dimension + k] =
|
||||||
|
REG_distances[(i * (mat_dimension) + j) * vec_dimension + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// copy 0 to diagonal
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
for (k = 0; k < vec_dimension; k++)
|
||||||
|
REG_distances[(i * (mat_dimension + 1) * vec_dimension) + k] = 0;
|
||||||
|
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::Build_REG_kernels_mat()
|
||||||
|
{
|
||||||
|
unsigned int i, j, k;
|
||||||
|
unsigned int mat_dimension, vec_dimension;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// copy class parameters
|
||||||
|
mat_dimension = REG_count_kernels;
|
||||||
|
vec_dimension = REG_count_dimensions;
|
||||||
|
// crete kernel coefficients matrix for gaussian kernels
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
for (j = i + 1; j < mat_dimension; j++)
|
||||||
|
{
|
||||||
|
REG_kernels[i * mat_dimension + j] = Kernel(®_distances[(i * (mat_dimension) + j) * vec_dimension],
|
||||||
|
vec_dimension, REG_kernel_id);
|
||||||
|
}
|
||||||
|
// copy calculated values under diagonal
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
for (j = i + 1; j < mat_dimension; j++)
|
||||||
|
REG_kernels[(j * (mat_dimension) + i)] = REG_kernels[(i * (mat_dimension) + j)];
|
||||||
|
|
||||||
|
// copy 1 to diagonal
|
||||||
|
for (i = 0; i < mat_dimension; i++)
|
||||||
|
REG_kernels[i * (mat_dimension + 1)] = 1;
|
||||||
|
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::Build_REG_kernels_mat_sums()
|
||||||
|
{
|
||||||
|
unsigned int i, j;
|
||||||
|
unsigned int count_kernels;
|
||||||
|
T sum;
|
||||||
|
|
||||||
|
count_kernels = REG_count_kernels;
|
||||||
|
for (i = 0; i < count_kernels; i++)
|
||||||
|
{
|
||||||
|
sum = 0;
|
||||||
|
for (j = 0; j < count_kernels; j++)
|
||||||
|
sum += REG_kernels[(i * count_kernels) + j];
|
||||||
|
|
||||||
|
REG_kernels_sums[i] = sum;
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T REG<T>::REG_RMSE()
|
||||||
|
{
|
||||||
|
T delta_square;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
T y_max = REG_y_vec.amax();
|
||||||
|
T y_min = REG_y_vec.amin();
|
||||||
|
|
||||||
|
REG_y_delta_vec |= REG_y_preds_vec;
|
||||||
|
REG_y_delta_vec -= REG_y_vec;
|
||||||
|
REG_y_delta_vec *= REG_y_delta_vec;
|
||||||
|
delta_square = REG_y_delta_vec.sum();
|
||||||
|
delta_square = delta_square/REG_count_kernels;
|
||||||
|
delta_square = std::sqrt(delta_square);
|
||||||
|
return(delta_square);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T REG<T>::Loss_Function()
|
||||||
|
{
|
||||||
|
unsigned int count_x;
|
||||||
|
T RMSE;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// calculating the root mean square error - RMSE
|
||||||
|
RMSE = REG_RMSE();
|
||||||
|
return (RMSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Fit(const T max_Loss, const unsigned int max_learning_cycles)
|
||||||
|
{ // the kernel ridge learning algorithm
|
||||||
|
unsigned int i, j;
|
||||||
|
T Loss;
|
||||||
|
T Lasso_part;
|
||||||
|
T kernel_ridge_part;
|
||||||
|
unsigned int count_kernels;
|
||||||
|
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// checking the input parameters
|
||||||
|
if (max_learning_cycles == 0)
|
||||||
|
{
|
||||||
|
laerror("zero learning cycles");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
if (max_Loss < 0)
|
||||||
|
{
|
||||||
|
laerror("negative value of maximum Loss function");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
Lasso_part = REG_lambda * REG_alpha;
|
||||||
|
kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5;
|
||||||
|
count_kernels = REG_count_kernels;
|
||||||
|
|
||||||
|
// initialization of REG_distances_mat and REG_kernels_mat if necessary
|
||||||
|
if (REG_mat_init == false)
|
||||||
|
{
|
||||||
|
Build_REG_distances_mat();
|
||||||
|
Build_REG_kernels_mat();
|
||||||
|
Build_REG_kernels_mat_sums();
|
||||||
|
REG_mat_init = true;
|
||||||
|
}
|
||||||
|
// initialitation of weight coefficients vector
|
||||||
|
REG_weights_vec |= REG_y_vec;
|
||||||
|
// Linear learning cycle of weight coefficient for learning
|
||||||
|
for (i = 0; i < max_learning_cycles; i++)
|
||||||
|
{
|
||||||
|
// calculating preds_vec vector of predicted y values
|
||||||
|
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
|
||||||
|
if (kernel_ridge_part != 0)
|
||||||
|
for (j = 0; j < REG_count_kernels; j++)
|
||||||
|
REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j];
|
||||||
|
|
||||||
|
if (Lasso_part != 0)
|
||||||
|
REG_y_preds_vec += Lasso_part;
|
||||||
|
|
||||||
|
// calculating new weights
|
||||||
|
REG_y_temp_vec |= REG_y_vec;
|
||||||
|
REG_y_temp_vec /= REG_y_preds_vec;
|
||||||
|
REG_weights_vec *= REG_y_temp_vec;
|
||||||
|
// checking the Loss_Function value
|
||||||
|
if (max_Loss > 0)
|
||||||
|
{
|
||||||
|
Loss = Loss_Function();
|
||||||
|
if (Loss < max_Loss)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate)
|
||||||
|
{
|
||||||
|
unsigned int i, j, k;
|
||||||
|
unsigned int count_x;
|
||||||
|
T Loss;
|
||||||
|
T delta_RMSE;
|
||||||
|
// for optimizing calculation of Kernel ridge part of Loss function difference
|
||||||
|
T kernel_ridge_part;
|
||||||
|
T delta_KRR_part;
|
||||||
|
// for optimizing calculation of Lasso part of Loss function difference
|
||||||
|
T Lasso_part;
|
||||||
|
T delta_Lasso_part;
|
||||||
|
T sqrt_count_x;
|
||||||
|
|
||||||
|
count_x = REG_count_kernels;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// Checking the input parameters
|
||||||
|
if (max_learning_cycles == 0)
|
||||||
|
{
|
||||||
|
laerror("zero learning cycles");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
if (max_Loss < 0)
|
||||||
|
{
|
||||||
|
laerror("negative value of maximum Loss function");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// initialization of REG_distances_mat and REG_kernels_mat if necessary
|
||||||
|
if (REG_mat_init == false)
|
||||||
|
{
|
||||||
|
Build_REG_distances_mat();
|
||||||
|
Build_REG_kernels_mat();
|
||||||
|
Build_REG_kernels_mat_sums();
|
||||||
|
REG_mat_init = true;
|
||||||
|
}
|
||||||
|
// initialitation of weight coefficients vector
|
||||||
|
REG_weights_vec |= REG_y_vec;
|
||||||
|
kernel_ridge_part = REG_lambda * (1.00 - REG_alpha);
|
||||||
|
Lasso_part = REG_lambda * REG_alpha;
|
||||||
|
sqrt_count_x = std::sqrt(count_x);
|
||||||
|
// gradient descent Learning cycle of weight coefficient for learning
|
||||||
|
for (i = 0; i < max_learning_cycles; i++)
|
||||||
|
{
|
||||||
|
// calculating preds_vec vector of predicted y values
|
||||||
|
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
|
||||||
|
if (kernel_ridge_part != 0)
|
||||||
|
for (j = 0; j < count_x; j++)
|
||||||
|
REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j];
|
||||||
|
|
||||||
|
if (Lasso_part != 0)
|
||||||
|
REG_y_preds_vec += Lasso_part;
|
||||||
|
// calculating difference of root mean square error - RMSE
|
||||||
|
for (j = 0; j < count_x; j++)
|
||||||
|
{
|
||||||
|
delta_RMSE = 0;
|
||||||
|
for (k = 0; k < count_x; k++)
|
||||||
|
delta_RMSE += (REG_y_preds[k] - REG_Y[k]) * REG_kernels[j * count_x + k];
|
||||||
|
|
||||||
|
delta_RMSE /=sqrt_count_x;
|
||||||
|
// calculating new weights
|
||||||
|
REG_weights[j] /= (1 - learning_rate * delta_RMSE);
|
||||||
|
}
|
||||||
|
// convergence criteria check
|
||||||
|
if (max_Loss > 0)
|
||||||
|
{
|
||||||
|
Loss = Loss_Function();
|
||||||
|
if (Loss < max_Loss)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Save_fitted(const string model_file)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
ofstream modelfile(model_file);
|
||||||
|
string line;
|
||||||
|
|
||||||
|
// checking the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// save model to file
|
||||||
|
if (modelfile.is_open())
|
||||||
|
for (i = 0; i < REG_count_kernels; i++)
|
||||||
|
{
|
||||||
|
line = to_string(REG_weights[i]);
|
||||||
|
modelfile << line << '\n';
|
||||||
|
}
|
||||||
|
modelfile.close();
|
||||||
|
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Load_fitted(const string model_file)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
ifstream modelfile(model_file);
|
||||||
|
string line;
|
||||||
|
T line_value;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
// initialization of REG_distances_mat and REG_kernels_mat if necessary
|
||||||
|
if (REG_mat_init == false)
|
||||||
|
{
|
||||||
|
Build_REG_distances_mat();
|
||||||
|
Build_REG_kernels_mat();
|
||||||
|
REG_mat_init = true;
|
||||||
|
}
|
||||||
|
// load stored weights
|
||||||
|
if (modelfile.is_open())
|
||||||
|
{
|
||||||
|
for (i = 0; i < REG_count_kernels; i++)
|
||||||
|
if (getline(modelfile, line))
|
||||||
|
{
|
||||||
|
line_value = stod(line);
|
||||||
|
REG_weights[i] = line_value;
|
||||||
|
}
|
||||||
|
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size)
|
||||||
|
{
|
||||||
|
unsigned int i, j, k;
|
||||||
|
T pred_value;
|
||||||
|
T Lasso_part;
|
||||||
|
T kernel_ridge_part;
|
||||||
|
T sum_kernels_diff;
|
||||||
|
T weights_sum;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
Lasso_part = REG_lambda * REG_alpha;
|
||||||
|
kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5;
|
||||||
|
// loop for predicting new values for each x
|
||||||
|
for (i = 0; i < data_size; i++) // For every x value
|
||||||
|
{
|
||||||
|
pred_value = 0;
|
||||||
|
// calculating the REG_coordinates_vec
|
||||||
|
for (j = 0; j < REG_count_kernels; j++) // For every kernel
|
||||||
|
{
|
||||||
|
for (k = 0; k < REG_count_dimensions; k++) // For every x coordinate
|
||||||
|
{
|
||||||
|
REG_coordinates_diff[(j * REG_count_dimensions) + k] = x[(i * REG_count_dimensions) + k] -
|
||||||
|
REG_coordinates[(j * REG_count_dimensions) + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// calculating the REG_kernels_vec
|
||||||
|
if (kernel_ridge_part != 0)
|
||||||
|
{
|
||||||
|
sum_kernels_diff = 0;
|
||||||
|
for (j = 0; j < REG_count_kernels; j++) // For every kernel
|
||||||
|
{
|
||||||
|
REG_kernels_diff[j] = Kernel(®_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id);
|
||||||
|
pred_value += REG_kernels_diff[j] * REG_weights[j];
|
||||||
|
sum_kernels_diff += REG_kernels_diff[j];
|
||||||
|
}
|
||||||
|
if (sum_kernels_diff != 0)
|
||||||
|
pred_value *= (sum_kernels_diff + kernel_ridge_part)/sum_kernels_diff;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (j = 0; j < REG_count_kernels; j++) // For every kernel
|
||||||
|
{
|
||||||
|
REG_kernels_diff[j] = Kernel(®_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id);
|
||||||
|
pred_value += REG_kernels_diff[j] * REG_weights[j];
|
||||||
|
}
|
||||||
|
pred_value += Lasso_part;
|
||||||
|
// storing y prediction into memory
|
||||||
|
y_preds[i] = pred_value;
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
T prediction;
|
||||||
|
// check the initialization
|
||||||
|
if (REG_base_init == false)
|
||||||
|
{
|
||||||
|
laerror("regression model not inicialized");
|
||||||
|
return(1);
|
||||||
|
}
|
||||||
|
REG_Get_predictions(x, y_preds, data_size);
|
||||||
|
prediction = 0;
|
||||||
|
for (i = 0; i < data_size; i++)
|
||||||
|
prediction += (y[i] - y_preds[i]) * (y[i] - y_preds[i]);
|
||||||
|
|
||||||
|
score[0] = prediction;
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Set_params(const T* params, const string kernel)
|
||||||
|
{
|
||||||
|
// setting type of kernel
|
||||||
|
REG_kernel_type = kernel;
|
||||||
|
if (kernel == "Gaussian")
|
||||||
|
REG_kernel_id = 0;
|
||||||
|
if (kernel == "Laplacian")
|
||||||
|
REG_kernel_id = 1;
|
||||||
|
if (kernel == "Exponential")
|
||||||
|
REG_kernel_id = 2;
|
||||||
|
if (kernel == "Matern")
|
||||||
|
REG_kernel_id = 3;
|
||||||
|
// copy parameters and data
|
||||||
|
REG_lambda = params[0];
|
||||||
|
REG_alpha = params[1];
|
||||||
|
if (REG_alpha > 1)
|
||||||
|
REG_alpha = 1;
|
||||||
|
if (REG_alpha < 0)
|
||||||
|
REG_alpha = 0;
|
||||||
|
REG_sigma = params[2];
|
||||||
|
if (kernel == "Matern")
|
||||||
|
REG_n = static_cast<unsigned int>(params[3] + 0.5);
|
||||||
|
|
||||||
|
// calculating Matern coefficients, if Matern kernel is selected
|
||||||
|
if (REG_kernel_id == 3)
|
||||||
|
{
|
||||||
|
REG_Matern_comb_number_vec.resize(REG_n);
|
||||||
|
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
|
||||||
|
Matern_coefficient(REG_n, REG_Matern_comb_number);
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Get_params(T* params, string* kernel)
|
||||||
|
{
|
||||||
|
kernel[0] = REG_kernel_type;
|
||||||
|
params[0] = REG_lambda;
|
||||||
|
params[1] = REG_alpha;
|
||||||
|
params[2] = REG_sigma;
|
||||||
|
|
||||||
|
if (REG_kernel_type == "Mattern")
|
||||||
|
params[3] = REG_n;
|
||||||
|
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int REG<T>::REG_Get_weight_coeff(T* weight)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
for (i = 0; i < REG_count_kernels; i++)
|
||||||
|
weight[i] = REG_weights[i];
|
||||||
|
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
REG<T>::~REG()
|
||||||
|
{
|
||||||
|
// dealocating memory to avoid memory leaks
|
||||||
|
if (REG_params != nullptr)
|
||||||
|
delete[] REG_params;
|
||||||
|
if (REG_x_file != nullptr)
|
||||||
|
delete[] REG_x_file;
|
||||||
|
if (REG_y_file != nullptr)
|
||||||
|
delete[] REG_y_file;
|
||||||
|
if (REG_param_file != nullptr)
|
||||||
|
delete[] REG_param_file;
|
||||||
|
if (REG_fit_file != nullptr)
|
||||||
|
delete[] REG_fit_file;
|
||||||
|
}
|
||||||
|
|
||||||
|
//enforce instantiation
|
||||||
|
|
||||||
|
#define INSTANTIZE(T) \
|
||||||
|
template T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma); \
|
||||||
|
template T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma); \
|
||||||
|
template T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma); \
|
||||||
|
template T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number); \
|
||||||
|
template unsigned int Matern_coefficient(const unsigned int n, T* comb_number); \
|
||||||
|
|
||||||
|
|
||||||
|
INSTANTIZE(double)
|
||||||
|
|
||||||
|
} // end of namespace
|
||||||
|
template class REG<double>;
|
192
reg.h
Normal file
192
reg.h
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
/*
|
||||||
|
LA: linear algebra C++ interface library
|
||||||
|
Kernel ridge regression module Copyright (C) 2024
|
||||||
|
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <bits/stdc++.h>
|
||||||
|
#include <fstream>
|
||||||
|
using namespace std;
|
||||||
|
# include "mat.h" // vector support libla implementation
|
||||||
|
# include "vec.h" // compiler parameters: -llapack -lblas
|
||||||
|
|
||||||
|
# include "la.h"
|
||||||
|
# ifndef REG_H
|
||||||
|
# define REG_H
|
||||||
|
|
||||||
|
namespace LA {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int Matern_coefficient(const unsigned int n, T* comb_number);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// The kernel regression of points in 1D (curve), 2D (surface), 3D and multi-D space
|
||||||
|
template <typename T>
|
||||||
|
class REG { // Kernel regression (REG) library
|
||||||
|
public:
|
||||||
|
// Constructor of kernel regression model from memory location
|
||||||
|
REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel);
|
||||||
|
// Constructor of kernel regression model from files
|
||||||
|
REG(const string x_file, const string y_file, const string param_file);
|
||||||
|
// Init function for constructor
|
||||||
|
void Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel);
|
||||||
|
// General kernel function
|
||||||
|
inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
|
||||||
|
// Function, that build matrix of distances between kernels
|
||||||
|
unsigned int Build_REG_distances_mat();
|
||||||
|
// Function, that build matrix of coefficients for kernel coordinates
|
||||||
|
unsigned int Build_REG_kernels_mat();
|
||||||
|
// Function, that build sums of matrix coefficients for kernel coordinates
|
||||||
|
unsigned int Build_REG_kernels_mat_sums();
|
||||||
|
// Function, that return root mean square error of model
|
||||||
|
T REG_RMSE(); // sum(delta_y)
|
||||||
|
// The Loss function
|
||||||
|
T Loss_Function(); // sum(delta_y) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
|
||||||
|
|
||||||
|
// Function, that learn kernels and set weights
|
||||||
|
unsigned int REG_Fit(const T max_Loss, const unsigned int max_learning_cycles);
|
||||||
|
// Function, that learn kernels and set weights by gradient descent method
|
||||||
|
unsigned int REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate);
|
||||||
|
// Function, that store fitted parameters of regression model to file
|
||||||
|
unsigned int REG_Save_fitted(const string model_file);
|
||||||
|
// Function, that load fitted parameters of model to file
|
||||||
|
unsigned int REG_Load_fitted(const string model_file);
|
||||||
|
// Function, that uses the kernel regression model to predict y for new data
|
||||||
|
unsigned int REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size);
|
||||||
|
// Function, that uses the kernel regression model to predict y for new data and investigate the model precision
|
||||||
|
unsigned int REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score);
|
||||||
|
// Setting parameters of kernel regression model
|
||||||
|
unsigned int REG_Set_params(const T* params, const string kernel);
|
||||||
|
// Getting parameters of kernel regression model
|
||||||
|
unsigned int REG_Get_params(T* params, string* kernel);
|
||||||
|
// Getting weight coefficients of kernels in regression model
|
||||||
|
unsigned int REG_Get_weight_coeff(T* weight);
|
||||||
|
// destructor of kernel regression model
|
||||||
|
~REG();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// string with type of kernel
|
||||||
|
string REG_kernel_type;
|
||||||
|
// number of type of kernel
|
||||||
|
unsigned int REG_kernel_id;
|
||||||
|
// Count of x items (dimensions) for kernels
|
||||||
|
unsigned int REG_count_dimensions;
|
||||||
|
// number of input x values/kernels
|
||||||
|
unsigned int REG_count_kernels;
|
||||||
|
// lambda parameter of regression
|
||||||
|
T REG_lambda;
|
||||||
|
// alpha parameter of regression
|
||||||
|
T REG_alpha;
|
||||||
|
// sigma parameter of kernels
|
||||||
|
T REG_sigma;
|
||||||
|
// n for Matern kernels
|
||||||
|
unsigned int REG_n;
|
||||||
|
// indicator of initialization or initialization errors
|
||||||
|
bool REG_base_init = false;
|
||||||
|
// indicator of initialization of allocation of memory
|
||||||
|
bool REG_aloc_init = false;
|
||||||
|
// indicator of initialization of REG_REG_distances_mat and REG_REG_kernels_mat
|
||||||
|
bool REG_mat_init = false;
|
||||||
|
|
||||||
|
// Matern kernel coefficient computed from n and k
|
||||||
|
T* REG_Matern_comb_number = nullptr;
|
||||||
|
// pointer to parameters of kernel regression, various size, first is lambda, second alpha, third sigma parameter of kernels
|
||||||
|
// and fourth the n parameter, if is used Matern kernel
|
||||||
|
T* REG_params = nullptr;
|
||||||
|
// variable which contains number of learning cycles of model
|
||||||
|
unsigned int learning_cycles = 0;
|
||||||
|
|
||||||
|
// x values of kernel regression input data
|
||||||
|
T* REG_coordinates = nullptr;
|
||||||
|
// y values of kernel regression input data
|
||||||
|
T* REG_Y = nullptr;
|
||||||
|
|
||||||
|
// matrix of distances between kernels
|
||||||
|
T* REG_distances = nullptr;
|
||||||
|
// matrix of kernel coefficients for kernel coordinates
|
||||||
|
T* REG_kernels = nullptr;
|
||||||
|
// vector of row sums of kernel coefficients for kernel coordinates
|
||||||
|
T* REG_kernels_sums = nullptr;
|
||||||
|
// vector of weight coefficients for each kernel
|
||||||
|
T* REG_weights = nullptr;
|
||||||
|
|
||||||
|
// vector of distances between kernels
|
||||||
|
T* REG_coordinates_diff = nullptr;
|
||||||
|
// vector of kernel coefficients for kernel coordinates
|
||||||
|
T* REG_kernels_diff = nullptr;
|
||||||
|
// vector of y predictions for each kernel
|
||||||
|
T* REG_y_preds = nullptr;
|
||||||
|
// vector of differences between y values and predictions for each kernel
|
||||||
|
T* REG_y_delta = nullptr;
|
||||||
|
// vector of temporary results for processing y values and predictions for each kernel
|
||||||
|
T* REG_y_temp = nullptr;
|
||||||
|
|
||||||
|
// pointer to x buffer for model loaded from file
|
||||||
|
T* REG_x_file = nullptr;
|
||||||
|
// pointer to y buffer for model loaded from file
|
||||||
|
T* REG_y_file = nullptr;
|
||||||
|
// pointer to parameters buffer for model loaded from file
|
||||||
|
T* REG_param_file = nullptr;
|
||||||
|
// pointer to fitted data for model loaded from file
|
||||||
|
T* REG_fit_file = nullptr;
|
||||||
|
|
||||||
|
// NRVec for Matern kernel coefficient computed from n and k
|
||||||
|
NRVec<T> REG_Matern_comb_number_vec;
|
||||||
|
|
||||||
|
// NRVec for matrix of distances between kernels
|
||||||
|
NRVec<T> REG_coordinates_vec;
|
||||||
|
// NRVec for vector of y imput values of kernels
|
||||||
|
NRVec<T> REG_y_vec;
|
||||||
|
// NRMat for matrix of distances between kernels
|
||||||
|
NRMat<T> REG_distances_mat;
|
||||||
|
// NRMat for matrix of kernel coefficients for kernel coordinates
|
||||||
|
NRMat<T> REG_kernels_mat;
|
||||||
|
// NRVec for vector of row sums of kernel coefficients for kernel coordinates
|
||||||
|
NRVec<T> REG_kernels_sums_vec;
|
||||||
|
// NRVec for vector of weight coefficients of kernels
|
||||||
|
NRVec<T> REG_weights_vec;
|
||||||
|
|
||||||
|
// NRVec for matrix of coordinates differences between kernels
|
||||||
|
NRVec<T> REG_coordinates_diff_vec;
|
||||||
|
// NRVec for matrix of kernel coefficients for kernel coordinates
|
||||||
|
NRVec<T> REG_kernels_diff_vec;
|
||||||
|
|
||||||
|
// NRVec for vector of y predictions of kernels
|
||||||
|
NRVec<T> REG_y_preds_vec;
|
||||||
|
// NRVec for vector of differences between estimated real and real y values for training kernels
|
||||||
|
NRVec<T> REG_y_delta_vec;
|
||||||
|
// NRVec for temporary results for processing y values and predictions for each kernel
|
||||||
|
NRVec<T> REG_y_temp_vec;
|
||||||
|
};
|
||||||
|
// Include the template classes definitions
|
||||||
|
|
||||||
|
} // end of namespace
|
||||||
|
# endif /* REG_H */
|
1526
regsurf.cc
Normal file
1526
regsurf.cc
Normal file
File diff suppressed because it is too large
Load Diff
286
regsurf.h
Normal file
286
regsurf.h
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
/*
|
||||||
|
LA: linear algebra C++ interface library
|
||||||
|
Kernel ridge regression module Copyright (C) 2024
|
||||||
|
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#ifndef REGSURF_H
|
||||||
|
#define REGSURF_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <bits/stdc++.h>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
# include "mat.h" // vector support libla implementation
|
||||||
|
# include "vec.h" // compiler parameters: -llapack -lblas
|
||||||
|
# include "reg.h"
|
||||||
|
|
||||||
|
namespace LA {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc);
|
||||||
|
|
||||||
|
|
||||||
|
// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space
|
||||||
|
// and computing of ML-NEA absorption spectra
|
||||||
|
template <typename T>
|
||||||
|
class REGSURF { // REG for potential energy surfaces (PES) and other quantities surfaces
|
||||||
|
public:
|
||||||
|
// constructor of surface REG model from memory locations
|
||||||
|
REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
|
||||||
|
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
|
||||||
|
const T* values, const unsigned int* Z = nullptr);
|
||||||
|
// constructor of surface REG model from files
|
||||||
|
REGSURF(const string geometries_file, const string energies_file, const string parameters_file, const string Z_file = "");
|
||||||
|
// inicialization function of REG surface model
|
||||||
|
void Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
|
||||||
|
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
|
||||||
|
const T* values, const unsigned int* Z);
|
||||||
|
// General kernel function
|
||||||
|
inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
|
||||||
|
// function for building matrix of geometry distances between kernels
|
||||||
|
unsigned int Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances);
|
||||||
|
// function for building descriptor matrix
|
||||||
|
unsigned int Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc);
|
||||||
|
// Function, that build matrix of coefficients for kernel coordinates
|
||||||
|
unsigned int Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec<NRMat<T>> &kernels_vec_mat);
|
||||||
|
// Function, that build sums of matrix coefficients for kernel coordinates
|
||||||
|
unsigned int Build_s_kernels_mat_sums();
|
||||||
|
// Function, that return root mean square error of model
|
||||||
|
T s_RMSE(unsigned int level); // sqrt(sum(delta_y^2)/n)
|
||||||
|
// The Loss function
|
||||||
|
T Loss_Function(unsigned int level); // sqrt(sum(delta_y^2)/n) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
|
||||||
|
|
||||||
|
unsigned int s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate,
|
||||||
|
const bool element_invariant=true, unsigned int* kernel_classes=nullptr);
|
||||||
|
// Saving weight coefficients of regression surface model into file
|
||||||
|
unsigned int s_Save_fitted(const string weights);
|
||||||
|
// Loading weight coefficients of regression surface model from file
|
||||||
|
unsigned int s_Load_fitted(const string weights);
|
||||||
|
// This function is for getting predictions from kernel potential energy surface, geometry is loaded from memory
|
||||||
|
unsigned int s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels,
|
||||||
|
const T* geom, const unsigned int* surfaces, T* predictions);
|
||||||
|
// Function, that uses the regression surface to predict y for new data and investigate the model precision
|
||||||
|
unsigned int s_Score(const unsigned int count_geometries, const unsigned int count_levels,
|
||||||
|
const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score);
|
||||||
|
// Setting parameters of regression surface models
|
||||||
|
unsigned int s_Set_params(const string descriptor, const string kernel, const T* params);
|
||||||
|
// Getting parameters of regression surface models
|
||||||
|
unsigned int s_Get_params(string* descriptor, string* kernel, T* params);
|
||||||
|
// Setting the external descriptor function for computing the descriptor matrix
|
||||||
|
unsigned int s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*));
|
||||||
|
// Getting weight coefficients of regression surface model
|
||||||
|
unsigned int s_Get_weight_coeff(const unsigned int s_number, T* weights);
|
||||||
|
|
||||||
|
// Loading geometries for ML-NEA calculations from memory
|
||||||
|
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries);
|
||||||
|
// Loading geometries for ML-NEA calculations from file
|
||||||
|
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file);
|
||||||
|
// Calculation of energies for ML-NEA surfaces of geometries for all energy levels - ground and excited states
|
||||||
|
unsigned int s_Predict_ML_NEA_geometries();
|
||||||
|
// Calculation of ML-NEA energy spectra
|
||||||
|
unsigned int s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta);
|
||||||
|
// Saving the normalized ML-NEA energy spectra to file
|
||||||
|
unsigned int s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file);
|
||||||
|
// destructor of surface REG model
|
||||||
|
~REGSURF();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// string with setted type of kernels
|
||||||
|
string s_kernel_type;
|
||||||
|
// identification number of setted type of kernel
|
||||||
|
unsigned int s_kernel_id;
|
||||||
|
// string with selected type of descriptor for surface
|
||||||
|
string s_descriptor_type;
|
||||||
|
// identification number of selected type of descriptor
|
||||||
|
unsigned int s_descriptor_id;
|
||||||
|
|
||||||
|
// dimension of kernels in surface geometry
|
||||||
|
unsigned int s_count_dimensions;
|
||||||
|
// count of kernels in surface geometry
|
||||||
|
unsigned int s_count_kernels;
|
||||||
|
// count of geometries in surface
|
||||||
|
unsigned int s_count_geometries;
|
||||||
|
// count of levels/surfaces
|
||||||
|
unsigned int s_count_levels;
|
||||||
|
// lambda parameter of regression
|
||||||
|
T* s_lambda = nullptr;
|
||||||
|
// alpha parameter of regression
|
||||||
|
T* s_alpha = nullptr;
|
||||||
|
// sigma parameter of kernels
|
||||||
|
T s_sigma;
|
||||||
|
// n for Matern kernels
|
||||||
|
unsigned int s_n;
|
||||||
|
// count of geometries for ML-NEA model
|
||||||
|
T ML_NEA_count_geometries;
|
||||||
|
// minimum of energy in begin of s_ML_NEA_spectra_vec (atomic units)
|
||||||
|
T ML_NEA_energy_min;
|
||||||
|
// the energy step in s_ML_NEA_spectra_vec (atomic units)
|
||||||
|
T ML_NEA_energy_step;
|
||||||
|
|
||||||
|
// indicator of initialization of allocation of memory
|
||||||
|
bool s_aloc_init = false;
|
||||||
|
// indicator of initialization or initialization errors
|
||||||
|
bool s_base_init = false;
|
||||||
|
// indicator of initialization of s_distances_mat
|
||||||
|
bool s_distances_mat_init = false;
|
||||||
|
// indicator of initialization of s_descriptors_mat
|
||||||
|
bool s_descriptors_mat_init = false;
|
||||||
|
// indicator of initialization of s_kernels_mat
|
||||||
|
bool s_kernels_mat_init = false;
|
||||||
|
// indicator of using of extern descriptor functions
|
||||||
|
bool extern_descriptor = false;
|
||||||
|
// pointer to external descriptor function
|
||||||
|
unsigned int (*extern_desc)(const unsigned int, const unsigned int, const T*, T*) = nullptr;
|
||||||
|
|
||||||
|
// indicator of loading of ML_NEA input data
|
||||||
|
bool s_ML_NEA_input_init = false;
|
||||||
|
// indicator of computing of ML_NEA surfaces
|
||||||
|
bool s_ML_NEA_surf_init = false;
|
||||||
|
// indicator of computing of ML_NEA spectra
|
||||||
|
bool s_ML_NEA_spectra_init = false;
|
||||||
|
|
||||||
|
// the surface kernel parameters pointer, sigma parameter is first
|
||||||
|
T* s_params = nullptr;
|
||||||
|
// Matern kernel coefficient computed from n and k
|
||||||
|
T* s_Matern_comb_number = nullptr;
|
||||||
|
// numbers of geometries with minimum of y for levels [s_count_levels]
|
||||||
|
unsigned int* s_y_min = nullptr;
|
||||||
|
// The geometries data of each geometries [s_kernel_dimension * s_count_kernels * s_count_geometries]
|
||||||
|
T* s_geometries = nullptr;
|
||||||
|
// The surfaces y data values of each surfaces [s_count_geometries * s_count_surf]
|
||||||
|
T* s_y = nullptr;
|
||||||
|
// The surfaces Z proton numbers of atoms [s_count_kernels]
|
||||||
|
unsigned int* s_Z = nullptr;
|
||||||
|
// the sum of sums of kernel matrix for base geometry
|
||||||
|
T s_kernels_sum_sum;
|
||||||
|
|
||||||
|
// matrix of coordinate distances between kernels for each geometry [s_count_kernels * s_count_kernels * s_count_geometries]
|
||||||
|
T* s_distances = nullptr;
|
||||||
|
// descriptor matrices for each geometry [s_kernel_dimension * s_kernel_dimension * s_count_geometries]
|
||||||
|
T* s_desc = nullptr;
|
||||||
|
// matrices of kernel coefficients for each surface kernel geometry [s_count_kernels * s_count_kernels]
|
||||||
|
T* s_kernels = nullptr;
|
||||||
|
// sums of kernels rows for geometries [s_count_kernels]
|
||||||
|
T* s_kernels_rows_sums;
|
||||||
|
// matrix of differences of sum kernel rows between geometries [s_count_kernels]
|
||||||
|
T* s_kernels_diff;
|
||||||
|
// vector differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
|
||||||
|
T* s_kernels_diff_sums;
|
||||||
|
// vectors of weight coefficients for each surface kernel and surface [s_count_kernels]
|
||||||
|
T* s_weights = nullptr;
|
||||||
|
|
||||||
|
// matrices of surface y energy/quantity predictions for each geometry and surface [s_count_geometries]
|
||||||
|
T* s_y_preds = nullptr;
|
||||||
|
// the surfaces difference y data values of each surfaces [s_count_geometries]
|
||||||
|
T* s_y_delta = nullptr;
|
||||||
|
// the surfaces temporary y data values of each surfaces [s_count_geometries]
|
||||||
|
T* s_y_temp = nullptr;
|
||||||
|
// the surfaces temporary y partial data values of each surfaces [count_kernels]
|
||||||
|
T* s_y_temp_part = nullptr;
|
||||||
|
// coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
|
||||||
|
T* s_distances_pred;
|
||||||
|
// matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
|
||||||
|
T* s_desc_pred;
|
||||||
|
// vector of matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
|
||||||
|
T* s_kernels_pred;
|
||||||
|
|
||||||
|
// pointer to x buffer for model loaded from file
|
||||||
|
T* s_geom_file = nullptr;
|
||||||
|
// pointer to y buffer for model loaded from file
|
||||||
|
T* s_y_file = nullptr;
|
||||||
|
// pointer to parameters buffer for model loaded from file
|
||||||
|
T* s_param_file = nullptr;
|
||||||
|
// pointer to fitted data for model loaded from file
|
||||||
|
T* s_fit_file = nullptr;
|
||||||
|
// pointer to fitted data for model loaded from Z file
|
||||||
|
unsigned int* s_Z_file = nullptr;
|
||||||
|
// pointer to ML-NEA geometries buffer for model loaded from file
|
||||||
|
T* s_ML_NEA_geom_file = nullptr;
|
||||||
|
|
||||||
|
// pointer to geometries loaded to ML_NEA model [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
|
||||||
|
T* s_ML_NEA_geometries = nullptr;
|
||||||
|
// ML-NEA surfaces numbers
|
||||||
|
unsigned int* s_ML_NEA_surf_numbers;
|
||||||
|
// pointer to y values computed for ML_NEA model [ML_NEA_count_geometries, s_count_levels]
|
||||||
|
T* s_ML_NEA_y = nullptr;
|
||||||
|
// pointer to minimal y values in geometries computed for ML_NEA model [s_count_levels]
|
||||||
|
T* s_ML_NEA_min_y = nullptr;
|
||||||
|
// pointer to spectra computed for ML_NEA model [count_steps + 1]
|
||||||
|
T* s_ML_NEA_spectra = nullptr;
|
||||||
|
|
||||||
|
// NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, count_geometries]
|
||||||
|
NRMat<T> s_geometries_mat;
|
||||||
|
// NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels]
|
||||||
|
NRMat<T> s_y_mat;
|
||||||
|
// NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels]
|
||||||
|
NRVec<unsigned int> s_Z_vec;
|
||||||
|
|
||||||
|
// NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries]
|
||||||
|
NRMat<T> s_distances_mat;
|
||||||
|
// NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries]
|
||||||
|
NRMat<T> s_desc_mat;
|
||||||
|
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry
|
||||||
|
// [count_geometries, count_kernels * count_kernels]
|
||||||
|
NRVec<NRMat<T>> s_kernels_vec_mat;
|
||||||
|
// NRVec of NRVec for sums of kernels rows for geometries [s_count_kernels * s_count_geometries]
|
||||||
|
NRVec<NRVec<T>> s_kernels_rows_sums_vec_vec;
|
||||||
|
// NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries]
|
||||||
|
NRVec<NRVec<T>> s_kernels_diff_vec_vec;
|
||||||
|
// NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
|
||||||
|
NRVec<T> s_kernels_diff_sums_vec;
|
||||||
|
// NRVec of NRVec for vectors of weight coefficients of surface kernels in surface [s_count_kernels, s_count_levels]
|
||||||
|
NRVec<NRVec<T>> s_weights_vec_vec;
|
||||||
|
// NRVec for vector of y of geomeries for surface [s_count_geometries]
|
||||||
|
NRVec<T> s_y_vec;
|
||||||
|
|
||||||
|
// NRVec for vector of y predictions of geomeries for surface [s_count_geometries]
|
||||||
|
NRVec<T> s_y_preds_vec;
|
||||||
|
// NRVec for vector of delta y values for surface [s_count_geometries]
|
||||||
|
NRVec<T> s_y_delta_vec;
|
||||||
|
// NRVec for vector of temporary y values for surface [s_count_geometries]
|
||||||
|
NRVec<T> s_y_temp_vec;
|
||||||
|
// NRVec for vector of temporary partial y values for geometry [s_count_kernels]
|
||||||
|
NRVec<T> s_y_temp_part_vec;
|
||||||
|
// NRMat for coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
|
||||||
|
NRMat<T> s_distances_pred_mat;
|
||||||
|
// NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
|
||||||
|
NRMat<T> s_desc_pred_mat;
|
||||||
|
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
|
||||||
|
NRVec<NRMat<T>> s_kernels_pred_vec_mat;
|
||||||
|
|
||||||
|
// NRMat for ML-NEA geometries [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
|
||||||
|
NRMat<T> s_ML_NEA_geometries_mat;
|
||||||
|
// NRVec for ML-NEA surfaces numbers
|
||||||
|
NRVec<unsigned int> s_ML_NEA_surf_numbers_vec;
|
||||||
|
// NRMat for ML-NEA energies [s_count_levels, ML_NEA_count_geometries]
|
||||||
|
NRMat<T> s_ML_NEA_y_mat;
|
||||||
|
// NRVec of minimal y values in geometries computed for ML_NEA model [s_count_levels]
|
||||||
|
NRVec<T> s_ML_NEA_min_y_vec;
|
||||||
|
// NRVec for ML-NEA energy spectra [count_steps + 1]
|
||||||
|
NRVec<T> s_ML_NEA_spectra_vec;
|
||||||
|
};
|
||||||
|
} // end of namespace
|
||||||
|
# endif /* REG_H */
|
131
test_reg.cc
Normal file
131
test_reg.cc
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
/*
|
||||||
|
LA: linear algebra C++ interface library
|
||||||
|
Kernel ridge regression module Copyright (C) 2024
|
||||||
|
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "reg.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace LA;
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
// unsigned int i;
|
||||||
|
|
||||||
|
double x;
|
||||||
|
double y;
|
||||||
|
bool end = false;
|
||||||
|
string user_input;
|
||||||
|
string model_x = "reg_test_x.csv";
|
||||||
|
string model_y = "reg_test_y.csv";
|
||||||
|
string model_p = "reg_test_p.csv";
|
||||||
|
REG<double> reg(model_x, model_y, model_p);
|
||||||
|
/*
|
||||||
|
double* reference_value;
|
||||||
|
double* score;
|
||||||
|
double RMSE;
|
||||||
|
double Loss;
|
||||||
|
double* params;
|
||||||
|
double* weights;
|
||||||
|
string kernel;
|
||||||
|
|
||||||
|
reference_value = new double[1];
|
||||||
|
score = new double[1];
|
||||||
|
params = new double[4];
|
||||||
|
weights = new double[451];
|
||||||
|
|
||||||
|
reference_value[0] = 1.00;
|
||||||
|
*/
|
||||||
|
// creating model
|
||||||
|
// fitting model
|
||||||
|
reg.REG_Fit(0.0, 20);
|
||||||
|
//reg.REG_Fit_grad(0.1, 50, 0.2);
|
||||||
|
// get predictions
|
||||||
|
|
||||||
|
cout << "Program for testing regression model \n" << endl;
|
||||||
|
/*
|
||||||
|
reg.REG_Get_weight_coeff(weights);
|
||||||
|
cout << "The weight coefficients are: " << kernel << endl;
|
||||||
|
for (i = 0; i < 451; i++)
|
||||||
|
cout << weights[i] << endl;
|
||||||
|
*/
|
||||||
|
while (not end)
|
||||||
|
{
|
||||||
|
cout << "Please enter the x value or y to end \n" << endl;
|
||||||
|
cin >> user_input;
|
||||||
|
if (user_input == "y")
|
||||||
|
{
|
||||||
|
end = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
x = stod(user_input);
|
||||||
|
|
||||||
|
reg.REG_Get_predictions(&x, &y, 1);
|
||||||
|
cout << "The model prediction is: " << y << endl;
|
||||||
|
/*
|
||||||
|
reg.REG_Get_Score(&x, &y, 1, reference_value, score);
|
||||||
|
cout << "The model prediction is: " << y << endl;
|
||||||
|
cout << "The reference value is: " << reference_value[0] << endl;
|
||||||
|
cout << "The score is: " << score[0] << endl;
|
||||||
|
|
||||||
|
RMSE = reg.REG_RMSE();
|
||||||
|
Loss = reg.Loss_Function();
|
||||||
|
cout << "The RMSE is: " << RMSE << endl;
|
||||||
|
cout << "The Loss is: " << Loss << endl;
|
||||||
|
|
||||||
|
reg.REG_Get_params(params, &kernel);
|
||||||
|
cout << "The parameters of model are: " << endl;
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
cout << "param: " << params[i] << endl;
|
||||||
|
|
||||||
|
cout << "The type of kernel is: " << kernel << endl;
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
params[0] = 0;
|
||||||
|
kernel = "Gaussian";
|
||||||
|
reg.REG_Set_params(params, kernel);
|
||||||
|
|
||||||
|
reg.REG_Get_params(params, &kernel);
|
||||||
|
cout << "The new parameters o model are: " << RMSE << endl;
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
cout << "param: " << params[i] << endl;
|
||||||
|
|
||||||
|
cout << "The new type of kernel is: " << kernel << endl;
|
||||||
|
cout << endl;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
/*reg.REG_Save_fitted("reg_test_m.csv");
|
||||||
|
reg.REG_Load_fitted("reg_test_m.csv");
|
||||||
|
end = false;
|
||||||
|
while (not end)
|
||||||
|
{
|
||||||
|
cout << "Please enter the x value or y to end \n" << endl;
|
||||||
|
cin >> user_input;
|
||||||
|
if (user_input == "y")
|
||||||
|
{
|
||||||
|
end = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
x = stod(user_input);
|
||||||
|
reg.REG_Get_predictions(&x, &y, 1);
|
||||||
|
cout << "The model prediction is: " << y << endl;
|
||||||
|
}*/
|
||||||
|
return(0);
|
||||||
|
};
|
118
test_regsurf.cc
Normal file
118
test_regsurf.cc
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
/*
|
||||||
|
LA: linear algebra C++ interface library
|
||||||
|
Kernel ridge regression module Copyright (C) 2024
|
||||||
|
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "regsurf.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace LA;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
unsigned int i, j;
|
||||||
|
unsigned int count_levels;
|
||||||
|
unsigned int count_kernels;
|
||||||
|
double x;
|
||||||
|
double y;
|
||||||
|
double RMSE;
|
||||||
|
double Loss;
|
||||||
|
bool end = false;
|
||||||
|
string user_input;
|
||||||
|
string model_geoms = "xyz.dat";
|
||||||
|
string model_y_values = "E_sum.dat";
|
||||||
|
string model_params = "conf_surf.dat";
|
||||||
|
string model_Z_file = "Z.dat";
|
||||||
|
string model_test_geom = "nea_geoms.xyz";
|
||||||
|
/*
|
||||||
|
double* weight_coeff;
|
||||||
|
double* params;
|
||||||
|
string model_weight_file;
|
||||||
|
string* descriptor;
|
||||||
|
string* kernel;
|
||||||
|
*/
|
||||||
|
count_levels = 10;
|
||||||
|
count_kernels = 12;/*
|
||||||
|
descriptor = new string[1];
|
||||||
|
kernel = new string[1];
|
||||||
|
|
||||||
|
weight_coeff = new double[count_kernels * count_levels];
|
||||||
|
params = new double[22];
|
||||||
|
model_weight_file = "weights.dat";
|
||||||
|
*/
|
||||||
|
REGSURF<double> regsurf(model_geoms, model_y_values, model_params, model_Z_file);
|
||||||
|
// creating model
|
||||||
|
// fitting model
|
||||||
|
regsurf.s_Fit(0.0, 100, 0.5);
|
||||||
|
//reg.REG_Fit_grad(0.1, 50, 0.2);
|
||||||
|
|
||||||
|
// get predictions
|
||||||
|
|
||||||
|
cout << "Program for testing regression model \n" << endl;
|
||||||
|
cout << "The root mean square errors: " << endl;
|
||||||
|
for (i = 0; i < count_levels; i++)
|
||||||
|
{
|
||||||
|
RMSE = regsurf.s_RMSE(i);
|
||||||
|
cout << RMSE << endl;
|
||||||
|
}
|
||||||
|
cout << "The values of Loss_function: " << endl;
|
||||||
|
for (i = 0; i < count_levels; i++)
|
||||||
|
{
|
||||||
|
Loss = regsurf.Loss_Function(i);
|
||||||
|
cout << Loss << endl;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
cout << "The weights of kernels: " << endl;
|
||||||
|
for (i = 0; i < count_levels; i++)
|
||||||
|
{
|
||||||
|
cout << "Level " << i << ": ";
|
||||||
|
regsurf.s_Get_weight_coeff(i, weight_coeff + (i * count_kernels));
|
||||||
|
for (j = 0; j < count_kernels; j++)
|
||||||
|
cout << weight_coeff[(i * count_kernels) + j] << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
regsurf.s_Save_fitted(model_weight_file);
|
||||||
|
regsurf.s_Load_fitted(model_weight_file);
|
||||||
|
regsurf.s_Get_params(descriptor, kernel, params);
|
||||||
|
|
||||||
|
cout << "The descriptor is: " << descriptor[0] << endl;
|
||||||
|
cout << "The kernel is: " << kernel[0] << endl;
|
||||||
|
cout << "The parameters of model are: ";
|
||||||
|
for (i = 0; i < (2 * count_levels) + 1; i++)
|
||||||
|
cout << params[i] << endl;
|
||||||
|
|
||||||
|
descriptor[0] = "Inverse_pairs";
|
||||||
|
kernel[0] = "Laplacian";
|
||||||
|
params[1] = 0.2;
|
||||||
|
regsurf.s_Set_params(descriptor[0], kernel[0], params);
|
||||||
|
|
||||||
|
regsurf.s_Get_params(descriptor, kernel, params);
|
||||||
|
cout << "The new descriptor is: " << descriptor[0] << endl;
|
||||||
|
cout << "The new kernel is: " << kernel[0] << endl;
|
||||||
|
cout << "The new parameters of model are: ";
|
||||||
|
for (i = 0; i < (2 * count_levels) + 1; i++)
|
||||||
|
cout << params[i] << endl;
|
||||||
|
*/
|
||||||
|
regsurf.s_Load_ML_NEA_geometries(50000, "nea_geoms.xyz");
|
||||||
|
regsurf.s_Predict_ML_NEA_geometries();
|
||||||
|
regsurf.s_Compute_ML_NEA_spectra(0.00, 0.0025, 4000, 0.2);
|
||||||
|
regsurf.s_Save_ML_NEA_spectra("spectra.dat");
|
||||||
|
return(0);
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user