287 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			287 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
    LA: linear algebra C++ interface library
 | 
						|
    Kernel ridge regression module Copyright (C) 2024 
 | 
						|
    Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
 | 
						|
 | 
						|
    This program is free software: you can redistribute it and/or modify
 | 
						|
    it under the terms of the GNU General Public License as published by
 | 
						|
    the Free Software Foundation, either version 3 of the License, or
 | 
						|
    (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU General Public License
 | 
						|
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
*/
 | 
						|
#ifndef REGSURF_H 
 | 
						|
#define REGSURF_H
 | 
						|
 | 
						|
#include <iostream>
 | 
						|
#include <string>
 | 
						|
#include <cmath>
 | 
						|
#include <bits/stdc++.h>
 | 
						|
#include <fstream>
 | 
						|
 | 
						|
using namespace std;
 | 
						|
 | 
						|
# include "mat.h" // vector support libla implementation
 | 
						|
# include "vec.h" // compiler parameters: -llapack -lblas
 | 
						|
# include "reg.h"
 | 
						|
 | 
						|
namespace LA {
 | 
						|
 | 
						|
template <typename T>
 | 
						|
unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry);
 | 
						|
 | 
						|
template <typename T>
 | 
						|
unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z);
 | 
						|
 | 
						|
template <typename T>
 | 
						|
unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc);
 | 
						|
 | 
						|
 | 
						|
// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space
 | 
						|
// and computing of ML-NEA absorption spectra
 | 
						|
template <typename T>
 | 
						|
class REGSURF { // REG for potential energy surfaces (PES) and other quantities surfaces
 | 
						|
public:
 | 
						|
    // constructor of surface REG model from memory locations
 | 
						|
    REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
 | 
						|
    const unsigned int count_levels, const string descriptor, const string kernel, const T* params,  const T* geometries,
 | 
						|
    const T* values, const unsigned int* Z = nullptr);
 | 
						|
    // constructor of surface REG model from files
 | 
						|
    REGSURF(const string geometries_file, const string energies_file, const string parameters_file, const string Z_file = "");
 | 
						|
    // inicialization function of REG surface model
 | 
						|
    void Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
 | 
						|
    const unsigned int count_levels, const string descriptor, const string kernel, const T* params,  const T* geometries,
 | 
						|
    const T* values, const unsigned int* Z);
 | 
						|
    // General kernel function
 | 
						|
    inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
 | 
						|
    // function for building matrix of geometry distances between kernels
 | 
						|
    unsigned int Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances);
 | 
						|
    // function for building descriptor matrix
 | 
						|
    unsigned int Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc);
 | 
						|
    // Function, that build matrix of coefficients for kernel coordinates
 | 
						|
    unsigned int Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec<NRMat<T>> &kernels_vec_mat);
 | 
						|
    // Function, that build sums of matrix coefficients for kernel coordinates
 | 
						|
    unsigned int Build_s_kernels_mat_sums();
 | 
						|
    // Function, that return root mean square error of model
 | 
						|
    T s_RMSE(unsigned int level); // sqrt(sum(delta_y^2)/n)
 | 
						|
    // The Loss function
 | 
						|
    T Loss_Function(unsigned int level); // sqrt(sum(delta_y^2)/n) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
 | 
						|
    
 | 
						|
    unsigned int  s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate,
 | 
						|
    const bool element_invariant=true, unsigned int* kernel_classes=nullptr);
 | 
						|
    // Saving weight coefficients of regression surface model into file
 | 
						|
    unsigned int  s_Save_fitted(const string weights);
 | 
						|
    // Loading weight coefficients of regression surface model from file
 | 
						|
    unsigned int  s_Load_fitted(const string weights);
 | 
						|
    // This function is for getting predictions from kernel potential energy surface, geometry is loaded from memory
 | 
						|
    unsigned int  s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels,
 | 
						|
    const T* geom, const unsigned int* surfaces, T* predictions);
 | 
						|
    // Function, that uses the regression surface to predict y for new data and investigate the model precision
 | 
						|
    unsigned int  s_Score(const unsigned int count_geometries, const unsigned int count_levels,
 | 
						|
    const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score);
 | 
						|
    // Setting parameters of regression surface models
 | 
						|
    unsigned int  s_Set_params(const string descriptor, const string kernel, const T* params);
 | 
						|
    // Getting parameters of regression surface models
 | 
						|
    unsigned int  s_Get_params(string* descriptor, string* kernel, T* params);
 | 
						|
    // Setting the external descriptor function for computing the descriptor matrix
 | 
						|
    unsigned int s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*));
 | 
						|
    // Getting weight coefficients of regression surface model
 | 
						|
    unsigned int  s_Get_weight_coeff(const unsigned int s_number, T* weights);
 | 
						|
        
 | 
						|
    // Loading geometries for ML-NEA calculations from memory
 | 
						|
    unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries);
 | 
						|
    // Loading geometries for ML-NEA calculations from file
 | 
						|
    unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file);
 | 
						|
    // Calculation of energies for ML-NEA surfaces of geometries for all energy levels - ground and excited states
 | 
						|
    unsigned int s_Predict_ML_NEA_geometries();
 | 
						|
    // Calculation of ML-NEA energy spectra
 | 
						|
    unsigned int s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta);
 | 
						|
    // Saving the normalized ML-NEA energy spectra to file
 | 
						|
    unsigned int s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file);
 | 
						|
    // destructor of surface REG model 
 | 
						|
    ~REGSURF();
 | 
						|
 | 
						|
protected:
 | 
						|
    // string with setted type of kernels
 | 
						|
    string  s_kernel_type;
 | 
						|
    // identification number of setted type of kernel
 | 
						|
    unsigned int s_kernel_id;
 | 
						|
    // string with selected type of descriptor for surface
 | 
						|
    string  s_descriptor_type;
 | 
						|
    // identification number of selected type of descriptor
 | 
						|
    unsigned int  s_descriptor_id;
 | 
						|
    
 | 
						|
    // dimension of kernels in surface geometry
 | 
						|
    unsigned int  s_count_dimensions;
 | 
						|
    // count of kernels in surface geometry
 | 
						|
    unsigned int  s_count_kernels;
 | 
						|
    // count of geometries in surface
 | 
						|
    unsigned int  s_count_geometries;
 | 
						|
    // count of levels/surfaces
 | 
						|
    unsigned int  s_count_levels;
 | 
						|
    // lambda parameter of regression
 | 
						|
    T* s_lambda = nullptr;
 | 
						|
    // alpha parameter of regression
 | 
						|
    T* s_alpha = nullptr;
 | 
						|
    // sigma parameter of kernels
 | 
						|
    T s_sigma;
 | 
						|
    // n for Matern kernels
 | 
						|
    unsigned int s_n;
 | 
						|
    // count of geometries for ML-NEA model
 | 
						|
    T ML_NEA_count_geometries;
 | 
						|
    // minimum of energy in begin of s_ML_NEA_spectra_vec (atomic units)
 | 
						|
    T ML_NEA_energy_min;
 | 
						|
    // the energy step in s_ML_NEA_spectra_vec (atomic units)
 | 
						|
    T ML_NEA_energy_step;
 | 
						|
    
 | 
						|
    // indicator of initialization of allocation of memory
 | 
						|
    bool s_aloc_init = false;
 | 
						|
    // indicator of initialization or initialization errors
 | 
						|
    bool s_base_init = false;
 | 
						|
    // indicator of initialization of s_distances_mat 
 | 
						|
    bool s_distances_mat_init = false;
 | 
						|
    // indicator of initialization of s_descriptors_mat 
 | 
						|
    bool s_descriptors_mat_init = false;
 | 
						|
    // indicator of initialization of s_kernels_mat
 | 
						|
    bool s_kernels_mat_init = false;
 | 
						|
    // indicator of using of extern descriptor functions
 | 
						|
    bool extern_descriptor = false;
 | 
						|
    // pointer to external descriptor function
 | 
						|
    unsigned int (*extern_desc)(const unsigned int, const unsigned int, const T*, T*) = nullptr;
 | 
						|
    
 | 
						|
    // indicator of loading of ML_NEA input data
 | 
						|
    bool s_ML_NEA_input_init = false;
 | 
						|
    // indicator of computing of ML_NEA surfaces
 | 
						|
    bool s_ML_NEA_surf_init = false;
 | 
						|
    // indicator of computing of ML_NEA spectra
 | 
						|
    bool s_ML_NEA_spectra_init = false;
 | 
						|
    
 | 
						|
    // the surface kernel parameters pointer, sigma parameter is first
 | 
						|
    T*  s_params = nullptr;
 | 
						|
    // Matern kernel coefficient computed from n and k
 | 
						|
    T* s_Matern_comb_number = nullptr;
 | 
						|
    // numbers of geometries with minimum of y for levels [s_count_levels]
 | 
						|
    unsigned int* s_y_min = nullptr;
 | 
						|
    // The geometries data of each geometries [s_kernel_dimension * s_count_kernels * s_count_geometries]
 | 
						|
    T* s_geometries = nullptr;
 | 
						|
    // The surfaces y data values of each surfaces [s_count_geometries * s_count_surf]
 | 
						|
    T* s_y = nullptr;
 | 
						|
    // The surfaces Z proton numbers of atoms [s_count_kernels]
 | 
						|
    unsigned int* s_Z = nullptr;
 | 
						|
    // the sum of sums of kernel matrix for base geometry
 | 
						|
    T  s_kernels_sum_sum;
 | 
						|
    
 | 
						|
    // matrix of coordinate distances between kernels for each geometry [s_count_kernels * s_count_kernels * s_count_geometries]
 | 
						|
    T* s_distances = nullptr;
 | 
						|
    // descriptor matrices for each geometry [s_kernel_dimension * s_kernel_dimension * s_count_geometries]
 | 
						|
    T* s_desc = nullptr;
 | 
						|
    // matrices of kernel coefficients for each surface kernel geometry [s_count_kernels * s_count_kernels]
 | 
						|
    T* s_kernels = nullptr;
 | 
						|
    // sums of kernels rows for geometries [s_count_kernels]
 | 
						|
    T* s_kernels_rows_sums;
 | 
						|
    // matrix of differences of sum kernel rows between geometries [s_count_kernels]
 | 
						|
    T* s_kernels_diff;
 | 
						|
    // vector differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
 | 
						|
    T* s_kernels_diff_sums;
 | 
						|
    // vectors of weight coefficients for each surface kernel and surface [s_count_kernels]
 | 
						|
    T* s_weights = nullptr;
 | 
						|
    
 | 
						|
    // matrices of surface y energy/quantity predictions for each geometry and surface [s_count_geometries]
 | 
						|
    T*  s_y_preds = nullptr;
 | 
						|
    // the surfaces difference y data values of each surfaces [s_count_geometries]
 | 
						|
    T*  s_y_delta = nullptr;
 | 
						|
    // the surfaces temporary y data values of each surfaces [s_count_kernels]
 | 
						|
    T*  s_y_temp = nullptr;
 | 
						|
    // the surfaces temporary y partial data values of each surfaces [count_kernels]
 | 
						|
    T*  s_y_temp_part = nullptr;
 | 
						|
    // coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
 | 
						|
    T* s_distances_pred;
 | 
						|
    // matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
 | 
						|
    T* s_desc_pred;
 | 
						|
    // vector of matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
 | 
						|
    T* s_kernels_pred;
 | 
						|
    
 | 
						|
    // pointer to x buffer for model loaded from file
 | 
						|
    T* s_geom_file = nullptr;
 | 
						|
    // pointer to y buffer for model loaded from file
 | 
						|
    T* s_y_file = nullptr;
 | 
						|
    // pointer to parameters buffer for model loaded from file
 | 
						|
    T* s_param_file = nullptr;
 | 
						|
    // pointer to fitted data for model loaded from file
 | 
						|
    T* s_fit_file = nullptr;
 | 
						|
    // pointer to fitted data for model loaded from Z file
 | 
						|
    unsigned int* s_Z_file = nullptr;
 | 
						|
    // pointer to ML-NEA geometries buffer for model loaded from file
 | 
						|
    T* s_ML_NEA_geom_file = nullptr;
 | 
						|
    
 | 
						|
    // pointer to geometries loaded to ML_NEA model [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
 | 
						|
    T* s_ML_NEA_geometries = nullptr;
 | 
						|
    // ML-NEA surfaces numbers
 | 
						|
    unsigned int* s_ML_NEA_surf_numbers;
 | 
						|
    // pointer to y values computed for ML_NEA model [ML_NEA_count_geometries, s_count_levels]
 | 
						|
    T* s_ML_NEA_y = nullptr;
 | 
						|
    // pointer to minimal y values in geometries computed for ML_NEA model [s_count_levels]
 | 
						|
    T* s_ML_NEA_min_y = nullptr;
 | 
						|
    // pointer to spectra computed for ML_NEA model [count_steps + 1]
 | 
						|
    T* s_ML_NEA_spectra = nullptr;
 | 
						|
    
 | 
						|
    // NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, count_geometries]
 | 
						|
    NRMat<T> s_geometries_mat;
 | 
						|
    // NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels]
 | 
						|
    NRMat<T>  s_y_mat;
 | 
						|
    // NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels]
 | 
						|
    NRVec<unsigned int>  s_Z_vec;
 | 
						|
    
 | 
						|
    // NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries]
 | 
						|
    NRMat<T> s_distances_mat;
 | 
						|
    // NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries]
 | 
						|
    NRMat<T> s_desc_mat;
 | 
						|
    // NRVec of NRMat for matrix of kernel coefficients for kernels in geometry
 | 
						|
    // [count_geometries, count_kernels * count_kernels]
 | 
						|
    NRVec<NRMat<T>> s_kernels_vec_mat;
 | 
						|
    // NRVec of NRVec for sums of kernels rows for geometries [s_count_kernels * s_count_geometries]
 | 
						|
    NRVec<NRVec<T>> s_kernels_rows_sums_vec_vec;
 | 
						|
    // NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries]
 | 
						|
    NRVec<NRVec<T>> s_kernels_diff_vec_vec;
 | 
						|
    // NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
 | 
						|
    NRVec<T> s_kernels_diff_sums_vec;
 | 
						|
    // NRVec of NRVec for vectors of weight coefficients of surface kernels in surface  [s_count_kernels, s_count_levels]
 | 
						|
    NRVec<NRVec<T>> s_weights_vec_vec;
 | 
						|
    // NRVec for vector of y of geomeries for surface [s_count_geometries]
 | 
						|
    NRVec<T> s_y_vec;
 | 
						|
    
 | 
						|
    // NRVec for vector of y predictions of geomeries for surface [s_count_geometries]
 | 
						|
    NRVec<T> s_y_preds_vec;
 | 
						|
    // NRVec for vector of delta y values for surface [s_count_geometries]
 | 
						|
    NRVec<T> s_y_delta_vec;
 | 
						|
    // NRVec for vector of temporary y values for surface [s_count_kernels]
 | 
						|
    NRVec<T> s_y_temp_vec;
 | 
						|
    // NRVec for vector of temporary partial y values for geometry [s_count_kernels]
 | 
						|
    NRVec<T> s_y_temp_part_vec;
 | 
						|
    // NRMat for coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
 | 
						|
    NRMat<T> s_distances_pred_mat;
 | 
						|
    // NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
 | 
						|
    NRMat<T> s_desc_pred_mat;
 | 
						|
    // NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
 | 
						|
    NRVec<NRMat<T>> s_kernels_pred_vec_mat;
 | 
						|
    
 | 
						|
    // NRMat for ML-NEA geometries [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
 | 
						|
    NRMat<T> s_ML_NEA_geometries_mat;
 | 
						|
    // NRVec for ML-NEA surfaces numbers
 | 
						|
    NRVec<unsigned int> s_ML_NEA_surf_numbers_vec;
 | 
						|
    // NRMat for ML-NEA energies [s_count_levels, ML_NEA_count_geometries]
 | 
						|
    NRMat<T> s_ML_NEA_y_mat;
 | 
						|
    // NRVec of minimal y values in geometries computed for ML_NEA model [s_count_levels]
 | 
						|
    NRVec<T> s_ML_NEA_min_y_vec;
 | 
						|
    // NRVec for ML-NEA energy spectra [count_steps + 1]
 | 
						|
    NRVec<T> s_ML_NEA_spectra_vec;
 | 
						|
};
 | 
						|
} // end of namespace
 | 
						|
# endif /* REG_H */
 |