#pragma once

// [bl-turb.h]: boundary-layer turbulence statistics and budgets
//
// -------------------------------------------------------------------------------------------- //

#include "wstgrid3d.h"

//
// *[Note]: we need gcz >= 2 for computation of production terms:
//         _            _
//        dU           dV
// - w'w' -- ,  - w'w' --
//        dz           dz
//


namespace nse
{
	// Friction velocity (get)
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	T dynamic_velocity(T* U_z,
		const T Umax, const T c_kinematic_viscosity, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// Gradients
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void c_gradient_z(T* Grad,		// node: [W]
		const T* const C,			// node: [C]

		const nse_const3d::axisType axis, const wstGrid3d< T >& grid);	// [axisZ || axisYZ]

	template< typename T >
	void w_gradient_z(T* Grad,		// node: [C]
		const T* const W,			// node: [W]

		const nse_const3d::axisType axis, const wstGrid3d< T >& grid);	// [axisZ || axisYZ]
	// -------------------------------------------------------------------------------------------- //

	// Momentum eq. balance
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void momentum_eq(
		T* momentum_balance,			// node: [W]
		T* turbulent_momentum_flux,		// node: [W]
		T* viscous_stress,				// node: [W]

		const T* const uw_flux,			// node: [W]
		const T* const U_grad,			// node: [W]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE structure
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void TKE_structure(T* TKE,								// node: [C]
		T* u_TKE, T* v_TKE, T* w_TKE,						// node: [C]
		T* u_TKE_share, T* v_TKE_share, T* w_TKE_share,		// node: [C]

		const T* const U2,		// node: [C]
		const T* const V2,		// node: [C]
		const T* const W2,		// node: [C]
		const T* const U,		// node: [C]
		const T* const V,		// node: [C]
		const T* const W,		// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE anisotropy
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void TKE_anisotropy(
		T* TKE_aniso_uu, T* TKE_aniso_vv, T* TKE_aniso_ww,	// node: [C]
		T* TKE_aniso_uv, T* TKE_aniso_uw, T* TKE_aniso_vw,	// node: [C]

		const T* const TKE,			// node: [C]
		const T* const u_TKE,		// node: [C]
		const T* const v_TKE,		// node: [C]
		const T* const w_TKE,		// node: [C]
		const T* const uv_flux,		// node: [C]
		const T* const uw_flux,		// node: [W]
		const T* const vw_flux,		// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE production
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void u_TKE_production(T* _u_TKE_production,		// node: [C]

		const T* const UW_bottom,					// node: [W]
		const T* const UW_top,						// node: [W]
		const T* const U,							// node: [C]
		const T* const W,							// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void v_TKE_production(T* _v_TKE_production,		// node: [C]

		const T* const VW_bottom,					// node: [W]
		const T* const VW_top,						// node: [W]
		const T* const V,							// node: [C]
		const T* const W,							// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void w_TKE_production(T* _w_TKE_production,		// node: [C]

		const T* const W2_c,						// node: [C]
		const T* const W2_w,						// node: [W]
		const T* const W,							// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //


	// TKE transport
	// -------------------------------------------------------------------------------------------- //
	template< typename T >	// -1/2 * d[u'u'w']/dz
	void u_TKE_transport(T* _u_TKE_transport,	// node: [C]

		const T* const uu_w_flux,				// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >	// -1/2 * d[v'v'w']/dz
	void v_TKE_transport(T* _v_TKE_transport,	// node: [C]

		const T* const vv_w_flux,				// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >	// -1/2 * d[w'w'w']/dz
	void w_TKE_transport(T* _w_TKE_transport,	// node: [C]

		const T* const ww_w_flux,				// node: [C]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE pressure work
	// -------------------------------------------------------------------------------------------- //
	template< typename T >	// - d[p'w']/dz
	void w_TKE_pressure_work(T* _w_TKE_pressure_work,	// node: [C]

		const T* const pw_flux,							// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE exchange
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void w_TKE_exchange(T* _w_TKE_energy_exchange,	// node: [C]
		
		const T* const PSww,						// node: [C]
		const T* const Pressure,					// node: [C]
		const T* const W,							// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE dissipation
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void u_TKE_dissipation(T* _u_TKE_dissipation,	// node: [C]

		const T* const U_dissipation,				// node: [C]
		const T* const U,							// node: [C]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);

	template< typename T >
	void v_TKE_dissipation(T* _v_TKE_dissipation,	// node: [C]

		const T* const V_dissipation,				// node: [C]
		const T* const V,							// node: [C]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);

	template< typename T >
	void w_TKE_dissipation(T* _w_TKE_dissipation,	// node: [C]

		const T* const W_dissipation,				// node: [W]
		const T* const W,							// node: [W]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE iso dissipation
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void u_TKE_iso_dissipation(T* _u_TKE_iso_dissipation,	// node: [C]

		const T* const U_iso_dissipation,					// node: [C]
		const T* const U,									// node: [C]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);

	template< typename T >
	void v_TKE_iso_dissipation(T* _v_TKE_iso_dissipation,	// node: [C]

		const T* const V_iso_dissipation,					// node: [C]
		const T* const V,									// node: [C]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);

	template< typename T >
	void w_TKE_iso_dissipation(T* _w_TKE_iso_dissipation,	// node: [C]

		const T* const W_iso_dissipation,					// node: [W]
		const T* const W,									// node: [W]
		const T c_kinematic_viscosity, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// TKE heat flux
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void w_TKE_heat_flux(T* _w_TKE_heat_flux,	// node: [C]

		const T* const Tw_flux,					// node: [W]
		const T c_Richardson, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //


	// ui'uj' flux budget: production
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uv_production_shearU(T* _uv_production_shearU,		// node: [C]

		const T* const VW_bottom_uv,		// node: [~W-C]
		const T* const VW_top_uv,			// node: [~W-C]
		const T* const U,					// node: [C]
		const T* const V,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void uv_production_shearV(T* _uv_production_shearV,		// node: [C]

		const T* const UW_bottom_uv,		// node: [~W-C]
		const T* const UW_top_uv,			// node: [~W-C]
		const T* const U,					// node: [C]
		const T* const V,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);


	template< typename T >
	void uw_production_shearU(T* _uw_production_shearU,		// node: [W]

		const T* const W2_u,				// node: [C]
		const T* const W2_uw,				// node: [W]
		const T* const U,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void uw_production_shearW(T* _uw_production_shearW,		// node: [W]

		const T* const UW_bottom_uw,		// node: [~C-W]
		const T* const UW_top_uw,			// node: [~C-W]
		const T* const U,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);


	template< typename T >
	void vw_production_shearV(T* _vw_production_shearV,		// node: [W]

		const T* const W2_v,				// node: [C]
		const T* const W2_vw,				// node: [W]
		const T* const V,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_production_shearW(T* _vw_production_shearW,		// node: [W]

		const T* const VW_bottom_vw,		// node: [~C-W]
		const T* const VW_top_vw,			// node: [~C-W]
		const T* const V,					// node: [C]
		const T* const W,					// node: [W]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: transport
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uv_transport(T* _uv_transport,		// node: [C]

		const T* const uvw_flux,			// node: [W]
		const wstGrid3d< T >& grid);

	template< typename T >
	void uw_transport(T* _uw_transport,		// node: [W]

		const T* const uww_flux,			// node: [C]
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_transport(T* _vw_transport,		// node: [W]

		const T* const vww_flux,			// node: [C]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: pressure-strain correlations: 2 * p' * S'ij, i != j
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uw_pressure_strain(
		T* P2Suw_turb,		// node: [W]
		T* P2Suw_turb_c,	// node: [C] (shifting [W] -> [C])

		const T* const P2Suw,		// node: [W]
		const T* const Pressure,	// node: [C]
		const T* const U,			// node: [C]
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_pressure_strain(
		T* P2Svw_turb,		// node: [W]
		T* P2Svw_turb_c,	// node: [C] (shifting [W] -> [C])

		const T* const P2Svw,		// node: [C]
		const T* const Pressure,	// node: [C]
		const T* const V,			// node: [C]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: pressure work
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uw_pressure_work(T* _uw_pressure_work,		// node: [W]

		const T* const pu_flux,						// node: [C]
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_pressure_work(T* _vw_pressure_work,		// node: [W]

		const T* const pv_flux,						// node: [C]
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: dissipation
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uv_dissipation(T* _uv_dissipation,		// node: [C]

		const T* const UV_dissipation,			// node: [C]
		const T* const U,						// node: [C]
		const T* const V,						// node: [C]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);

	template< typename T >
	void uw_dissipation(T* _uw_dissipation,		// node: [W]

		const T* const UW_dissipation,			// node: [W]
		const T* const U,						// node: [C]
		const T* const W,						// node: [W]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_dissipation(T* _vw_dissipation,		// node: [W]

		const T* const VW_dissipation,			// node: [W]
		const T* const V,						// node: [C]
		const T* const W,						// node: [W]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: iso dissipation
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uv_iso_dissipation(T* _uv_iso_dissipation,		// node: [C]

		const T* const UV_iso_dissipation,				// node: [C]
		const T* const U,								// node: [C]
		const T* const V,								// node: [C]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);

	template< typename T >
	void uw_iso_dissipation(T* _uw_iso_dissipation,		// node: [W]

		const T* const UW_iso_dissipation,				// node: [W]
		const T* const U,								// node: [C]
		const T* const W,								// node: [W]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);

	template< typename T >
	void vw_iso_dissipation(T* _vw_iso_dissipation,		// node: [W]

		const T* const VW_iso_dissipation,				// node: [W]
		const T* const V,								// node: [C]
		const T* const W,								// node: [W]
		const T c_kinematic_viscosity,
		const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //

	// ui'uj' flux budget: buoyancy
	// -------------------------------------------------------------------------------------------- //
	template< typename T >
	void uw_buoyancy(T* _uw_buoyancy,	// node: [W]

		const T* const CU_uw,			// node: [W]
		const T* const C,				// node: [C]
		const T* const U,				// node: [C]
		const T c_Richardson, const wstGrid3d< T >& grid);

	template< typename T >
	void vw_buoyancy(T* _vw_buoyancy,	// node: [W]

		const T* const CV_vw,			// node: [W]
		const T* const C,				// node: [C]
		const T* const V,				// node: [C]
		const T c_Richardson, const wstGrid3d< T >& grid);
	// -------------------------------------------------------------------------------------------- //
}


// Friction velocity (get)
// -------------------------------------------------------------------------------------------- //
template< typename T >
T nse::dynamic_velocity(T* U_z,
	const T Umax, const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
{
	T u_bottom = (T)0, u_top = (T)0;

	if (grid.mpi_com.rank_z == 0)
		u_bottom = sqrt((T) 2.0 * c_kinematic_viscosity *
		fabs(U_z[grid.gcz] + (T) 0.5 * Umax) * (T) 2.0 * grid.dzmi[grid.gcz]);

	if (grid.mpi_com.rank_z == grid.mpi_com.size_z - 1)
		u_top = sqrt((T) 2.0 * c_kinematic_viscosity *
		fabs((T) 0.5 * Umax - U_z[grid.nz - grid.gcz - 1]) * (T) 2.0 * grid.dzpi[grid.nz - grid.gcz - 1]);

	mpi_allreduce(&u_bottom, &u_top, MPI_MAX, grid.mpi_com.comm);

	return (T) 0.5 * (u_bottom + u_top);
}
// -------------------------------------------------------------------------------------------- //

// Gradients
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::c_gradient_z(
	T* Grad,					// node: [W]

	const T* const C,			// node: [C]
	const nse_const3d::axisType axis, const wstGrid3d< T >& grid)
	//  __
	//  dC
	//  --
	//	dz
	//
	// [C] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	if (axis == nse_const3d::axisZ) {

		int k;
#pragma omp parallel for private(k) shared(Grad)
		for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
			Grad[k] = (C[k] - C[k - 1]) * (T) 2.0 * grid.dzmi[k];
		}
		return;
	}
	if (axis == nse_const3d::axisYZ) {

		int j, k, idx;
#pragma omp parallel for private(j, k, idx) shared(Grad)
		for (j = grid.gcy; j < grid.ny - grid.gcy; j++)
			for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
				idx = j * grid.nz + k;
				Grad[idx] = (C[idx] - C[idx - 1]) * (T) 2.0 * grid.dzmi[k];
			}
		return;
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::w_gradient_z(
	T* Grad,					// node: [C]

	const T* const W,			// node: [W]
	const nse_const3d::axisType axis, const wstGrid3d< T >& grid)
	//  __
	//  dW
	//  --
	//	dz
	//
	// [W] average has to be known at all [W] nodes, including walls
{
	if (axis == nse_const3d::axisZ) {

		int k;
#pragma omp parallel for private(k) shared(Grad)
		for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
			Grad[k] = (W[k + 1] - W[k]) * grid.dzi[k];
		}
		return;
	}
	if (axis == nse_const3d::axisYZ) {

		int j, k, idx;
#pragma omp parallel for private(j, k, idx) shared(Grad)
		for (j = grid.gcy; j < grid.ny - grid.gcy; j++)
			for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
				idx = j * grid.nz + k;
				Grad[idx] = (W[idx + 1] - W[idx]) * grid.dzi[k];
			}
		return;
	}
}
// -------------------------------------------------------------------------------------------- //

// Momentum eq. balance
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::momentum_eq(
	T* momentum_balance,			// node: [W]
	T* turbulent_momentum_flux,		// node: [W]
	T* viscous_stress,				// node: [W]

	const T* const uw_flux,			// node: [W]
	const T* const U_grad,			// node: [W]
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// integrated [U] averaged momentum equation
	//               _
	//    ____   1  dU
	//	- u'w' + -- -- = (u*)^2 
	//			 Re dz
	//    (1)      (2)
	// (1) - turbulent momentum flux
	// (2) - viscous stress
	// terms are defined at [UW] nodes (averaged in [W] nodes)
	//
	// [u'w', dU/dz] have to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(momentum_balance, \
	turbulent_momentum_flux, viscous_stress)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) { // all [W] nodes
		turbulent_momentum_flux[k] = -uw_flux[k];
		viscous_stress[k] = c_kinematic_viscosity * U_grad[k];

		momentum_balance[k] = turbulent_momentum_flux[k] + viscous_stress[k];
	}
}
// -------------------------------------------------------------------------------------------- //

// TKE structure
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::TKE_structure(
	T* TKE,												// node: [C]
	T* u_TKE, T* v_TKE, T* w_TKE,						// node: [C]
	T* u_TKE_share, T* v_TKE_share, T* w_TKE_share,		// node: [C]
	
	const T* const U2,		// node: [C]
	const T* const V2,		// node: [C]
	const T* const W2,		// node: [C]
	const T* const U,		// node: [C]
	const T* const V,		// node: [C]
	const T* const W,		// node: [W]
	const wstGrid3d< T >& grid)
	//
	// 1/2*u'[i]*u'[i]
	//
	// [W] average has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(TKE, u_TKE, v_TKE, w_TKE, \
	u_TKE_share, v_TKE_share, w_TKE_share)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		u_TKE[k] = (T)0.5 * (U2[k] - U[k] * U[k]);
		v_TKE[k] = (T)0.5 * (V2[k] - V[k] * V[k]);
		w_TKE[k] = (T)0.5 * (W2[k] - W[k] * W[k + 1]);

		TKE[k] = u_TKE[k] + v_TKE[k] + w_TKE[k];

		u_TKE_share[k] = u_TKE[k] / TKE[k];
		v_TKE_share[k] = v_TKE[k] / TKE[k];
		w_TKE_share[k] = w_TKE[k] / TKE[k];
	}
}
// -------------------------------------------------------------------------------------------- //

// TKE anisotropy
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::TKE_anisotropy(
	T* TKE_aniso_uu, T* TKE_aniso_vv, T* TKE_aniso_ww,		// node: [C]
	T* TKE_aniso_uv, T* TKE_aniso_uw, T* TKE_aniso_vw,		// node: [C]

	const T* const TKE,		// node: [C]
	const T* const u_TKE,	// node: [C]
	const T* const v_TKE,	// node: [C]
	const T* const w_TKE,	// node: [C]
	const T* const uv_flux,	// node: [C]
	const T* const uw_flux,	// node: [W]
	const T* const vw_flux,	// node: [W]

	const wstGrid3d< T >& grid)
	//
	// TKE anisotropy symmetric zero-trace tensor:
	// [(u(i)u(j)) / (u(k)u(k))] - 1/3 * delta(i,j)
	//
	// [u'w', v'w'] fluxes have to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(TKE_aniso_uu, TKE_aniso_vv, TKE_aniso_ww, \
	TKE_aniso_uv, TKE_aniso_uw, TKE_aniso_vw)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {

		TKE_aniso_uu[k] = (u_TKE[k] / TKE[k]) - ((T)1.0 / (T)3.0);
		TKE_aniso_vv[k] = (v_TKE[k] / TKE[k]) - ((T)1.0 / (T)3.0);
		TKE_aniso_ww[k] = (w_TKE[k] / TKE[k]) - ((T)1.0 / (T)3.0);

		TKE_aniso_uv[k] = (uv_flux[k] / ((T)2.0*TKE[k]));
		TKE_aniso_uw[k] = ((uw_flux[k] + uw_flux[k + 1]) / ((T)4.0*TKE[k]));
		TKE_aniso_vw[k] = ((vw_flux[k] + vw_flux[k + 1]) / ((T)4.0*TKE[k]));
	}
}
// -------------------------------------------------------------------------------------------- //

// TKE production
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::u_TKE_production(
	T* _u_TKE_production,		// node: [C]

	const T* const UW_bottom,	// node: [W]
	const T* const UW_top,		// node: [W]
	const T* const U,			// node: [C]
	const T* const W,			// node: [W]
	const wstGrid3d< T >& grid)
	//
	// [U] component of T.K.E. equation defined at [C] node via interpolation
	//            _
	//    ____   dU
	//	- u'w' * -- 
	//			 dz
	// [W] average has to be known at all [W] nodes, including walls
	// [U] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_u_TKE_production)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_u_TKE_production[k] = -(T) 0.5 * (
			// discretization is based on ADV. form //
			// correct for SKEW.form (checked) //
			(UW_bottom[k] - U[k] * W[k]) * (U[k] - U[k - 1]) * grid.dzi[k] +
			(UW_top[k] - U[k] * W[k + 1]) * (U[k + 1] - U[k]) * grid.dzi[k]);
	}
}

template< typename T >
void nse::v_TKE_production(
	T* _v_TKE_production,		// node: [C]

	const T* const VW_bottom,	// node: [W]
	const T* const VW_top,		// node: [W]
	const T* const V,			// node: [C]
	const T* const W,			// node: [W]
	const wstGrid3d< T >& grid)
	//
	// [V] component of T.K.E. equation defined at [C] node via interpolation
	//            _
	//    ____   dV
	//	- v'w' * -- 
	//			 dz
	// [W] average has to be known at all [W] nodes, including walls
	// [V] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_v_TKE_production)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_v_TKE_production[k] = -(T) 0.5 * (
			// discretization is based on ADV. form //
			// correct for SKEW.form (checked) //
			(VW_bottom[k] - V[k] * W[k]) * (V[k] - V[k - 1]) * grid.dzi[k] +
			(VW_top[k] - V[k] * W[k + 1]) * (V[k + 1] - V[k]) * grid.dzi[k]);
	}
}
template< typename T >
void nse::w_TKE_production(
	T* _w_TKE_production,		// node: [C]

	const T* const W2_c,		// node: [C]
	const T* const W2_w,		// node: [W]
	const T* const W,			// node: [W]
	const wstGrid3d< T >& grid)
	//
	// [W] component of T.K.E. equation defined at [C] node via interpolation
	//            _
	//    ____   dW
	//	- w'w' * -- 
	//			 dz
	// [W] average has to be known at all [W] nodes, including walls
{
	T* w_production;
	int buf_id = memStx::get_buf(&w_production, grid.nz);

	int k;

	// computing at [W] nodes:
#pragma omp parallel for private(k) shared(w_production)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		w_production[k] = -(
			(W2_w[k] + W2_c[k] - W[k] * W[k] - W[k] * W[k + 1]) * (W[k + 1] - W[k]) * grid.dzmih[k] +
			(W2_w[k] + W2_c[k - 1] - W[k] * W[k] - W[k] * W[k - 1]) * (W[k] - W[k - 1]) * grid.dzmih[k]);
	}

	// setting boundary conditions:
	w_dirichlet_bc_z(w_production, (T)0, (T)0, grid);

	// interpolation [W]->[C]:
#pragma omp parallel for private(k) shared(_w_TKE_production, w_production)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_production[k] =
			(T) 0.5 * (w_production[k] + w_production[k + 1]);
	}

	memStx::free_buf(buf_id);
}
// -------------------------------------------------------------------------------------------- //

// TKE transport
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::u_TKE_transport(
	T* _u_TKE_transport,		// node: [C]

	const T* const uu_w_flux,	// node: [W]
	const wstGrid3d< T >& grid)
	// [U] transport term of T.K.E. equation defined at [C] node via interpolation
	//       _____________________
	//      d[(1/2) * (u')^2 * w'] 
	//	-  ------------------------ 
	//				dz
	// [u'u'w'] flux has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_u_TKE_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_u_TKE_transport[k] = -(T) 0.5 *
			(uu_w_flux[k + 1] - uu_w_flux[k]) * grid.dzi[k];
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::v_TKE_transport(
	T* _v_TKE_transport,		// node: [C]

	const T* const vv_w_flux,	// node: [W]
	const wstGrid3d< T >& grid)
	// [V] transport term of T.K.E. equation defined at [C] node via interpolation
	//       _____________________
	//      d[(1/2) * (v')^2 * w'] 
	//	-  ------------------------ 
	//				dz
	// [v'v'w'] flux has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_v_TKE_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_v_TKE_transport[k] = -(T) 0.5 *
			(vv_w_flux[k + 1] - vv_w_flux[k]) * grid.dzi[k];
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::w_TKE_transport(
	T* _w_TKE_transport,		// node: [C]

	const T* const ww_w_flux,	// node: [C]
	const wstGrid3d< T >& grid)
	// [W] transport component of T.K.E. equation defined at [C] node via interpolation
	//       _____________________
	//      d[(1/2) * (w')^2 * w'] 
	//	-  ------------------------ 
	//				dz
	// [w'w'w'] flux has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_w_TKE_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_transport[k] = -(T) 0.5 * (
			(ww_w_flux[k + 1] - ww_w_flux[k]) * grid.dzpi[k] +
			(ww_w_flux[k] - ww_w_flux[k - 1]) * grid.dzmi[k]);
	}
}
// -------------------------------------------------------------------------------------------- //


// TKE pressure work
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::w_TKE_pressure_work(
	T* _w_TKE_pressure_work,	// node: [C]

	const T* const pw_flux,		// node: [W]
	const wstGrid3d< T >& grid)
	// [W] pressure work component of T.K.E. equation defined at [C] node via interpolation
	//       ______
	//      d[p'w'] 
	//	-  -------- 
	//		  dz
	// [p'w'] flux has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_w_TKE_pressure_work)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_pressure_work[k] = -(pw_flux[k + 1] - pw_flux[k]) * grid.dzi[k];
	}
}
// -------------------------------------------------------------------------------------------- //

// TKE exchange
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::w_TKE_exchange(
	T* _w_TKE_exchange,			// node: [C]

	const T* const PSww,		// node: [C]
	const T* const Pressure,	// node: [C]
	const T* const W,			// node: [W]
	const wstGrid3d< T >& grid)
	// [W] energy exchange component of T.K.E. equation defined at [C] node via interpolation
	// ______
	//   dw'
	// p'---
	//   dz
	// [W] average has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_w_TKE_exchange)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_exchange[k] = PSww[k] - Pressure[k] * ((W[k + 1] - W[k]) * grid.dzi[k]);
	}
}
// -------------------------------------------------------------------------------------------- //

// TKE dissipation
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::u_TKE_dissipation(
	T* _u_TKE_dissipation,			// node: [C]

	const T* const U_dissipation,	// node: [C]
	const T* const U,				// node: [C]
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [U] dissipation component of T.K.E. defined at [C] node via interpolation
	//       ___________
	//   1      d^2(u')
	//	-- * u' ------- 
	//	Re		dx(j)^2
	// [U] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_u_TKE_dissipation)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) { 
		_u_TKE_dissipation[k] = U_dissipation[k] -
			c_kinematic_viscosity * (U[k] *
			((U[k + 1] - U[k]) * grid.dzp2i[k] - (U[k] - U[k - 1]) * grid.dzm2i[k]));
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::v_TKE_dissipation(
	T* _v_TKE_dissipation,			// node: [C]

	const T* const V_dissipation,	// node: [C]
	const T* const V,				// node: [C]
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [V] dissipation component of T.K.E. defined at [C] node via interpolation
	//       ___________
	//   1      d^2(v')
	//	-- * v' ------- 
	//	Re		dx(j)^2 
	// [V] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_v_TKE_dissipation)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_v_TKE_dissipation[k] = V_dissipation[k] -
			c_kinematic_viscosity * (V[k] *
			((V[k + 1] - V[k]) * grid.dzp2i[k] - (V[k] - V[k - 1]) * grid.dzm2i[k]));
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::w_TKE_dissipation(
	T* _w_TKE_dissipation,			// node: [C]

	const T* const W_dissipation,	// node: [W]
	const T* const W,				// node: [W]
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [W] dissipation component of T.K.E. defined at [C] node via interpolation
	//       ___________
	//   1      d^2(w')
	//	-- * w' ------- 
	//	Re		dx(j)^2 
	// [W] average has to be known at all [W] nodes, including walls
	// *Note:
	//		computing dissipation at [W] nodes, setting 
	//		boundary conditions for turbulence dissipation (assuming = 0 at walls)
	//		and interpolating to [C] node
{
	T* w_diss;
	int buf_id = memStx::get_buf(&w_diss, grid.nz);

	int k;

	// computing at [W] nodes:
#pragma omp parallel for private(k) shared(w_diss)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		w_diss[k] = W_dissipation[k] -
			c_kinematic_viscosity * (W[k] *
			((W[k + 1] - W[k]) * grid.dzm2i[k] - (W[k] - W[k - 1]) * grid.dzp2i[k - 1]));
	}

	// setting boundary conditions:
	w_dirichlet_bc_z(w_diss, (T)0, (T)0, grid);

	// interpolation [W]->[C]:
#pragma omp parallel for private(k) shared(_w_TKE_dissipation, w_diss)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_dissipation[k] = 
			(T) 0.5 * (w_diss[k] + w_diss[k + 1]);
	}

	memStx::free_buf(buf_id);
}
// -------------------------------------------------------------------------------------------- //

// TKE iso dissipation
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::u_TKE_iso_dissipation(
	T* _u_TKE_iso_dissipation,			// node: [C] node

	const T* const U_iso_dissipation,	// node: [C] node
	const T* const U,					// node: [C] node
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [U] isotropic dissipation component of T.K.E. equation defined at [C] node via interpolation
	//       _____________
	//   1   d(u')   d(u')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	// [U] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_u_TKE_iso_dissipation)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_u_TKE_iso_dissipation[k] = -(U_iso_dissipation[k] -
			c_kinematic_viscosity * (
			(T)0.5 * (U[k + 1] - U[k]) * (U[k + 1] - U[k]) * grid.dzp2i[k] +
			(T)0.5 * (U[k] - U[k - 1]) * (U[k] - U[k - 1]) * grid.dzm2i[k]));
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::v_TKE_iso_dissipation(
	T* _v_TKE_iso_dissipation,			// node: [C] node

	const T* const V_iso_dissipation,	// node: [C] node
	const T* const V,					// node: [C] node
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [V] isotropic dissipation component of T.K.E. equation defined at [C] node via interpolation
	//       _____________
	//   1   d(v')   d(v')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	// [V] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_v_TKE_iso_dissipation)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_v_TKE_iso_dissipation[k] = -(V_iso_dissipation[k] -
			c_kinematic_viscosity * (
			(T)0.5 * (V[k + 1] - V[k]) * (V[k + 1] - V[k]) * grid.dzp2i[k] +
			(T)0.5 * (V[k] - V[k - 1]) * (V[k] - V[k - 1]) * grid.dzm2i[k]));
	}
}
// -------------------------------------------------------------------------------------------- //

template< typename T >
void nse::w_TKE_iso_dissipation(
	T* _w_TKE_iso_dissipation,			// node: [C]

	const T* const W_iso_dissipation,	// node: [W]
	const T* const W,					// node: [W]
	const T c_kinematic_viscosity, const wstGrid3d< T >& grid)
	// [W] isotropic dissipation component of T.K.E. equation defined at [C] node via interpolation
	//       _____________
	//   1   d(w')   d(w')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	// [W] average has to be known at all [W] nodes, including walls
	// *Note:
	//		computing isotropic dissipation at [W] nodes, setting 
	//		boundary conditions for turbulence dissipation (assuming = 0 at walls)
	//		and interpolating to [C] node
{
	T* w_iso_diss;
	int buf_id = memStx::get_buf(&w_iso_diss, grid.nz);

	int k;

	// computing at [W] nodes:
#pragma omp parallel for private(k) shared(w_iso_diss)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		w_iso_diss[k] = -(W_iso_dissipation[k] -
			c_kinematic_viscosity * (
			(T)0.5 * (W[k + 1] - W[k]) * (W[k + 1] - W[k]) * grid.dzm2i[k] +
			(T)0.5 * (W[k] - W[k - 1]) * (W[k] - W[k - 1]) * grid.dzp2i[k - 1]));
	}

	// setting boundary conditions:
	w_dirichlet_bc_z(w_iso_diss, (T)0, (T)0, grid);

	// interpolation [W]->[C]:
#pragma omp parallel for private(k) shared(_w_TKE_iso_dissipation, w_iso_diss)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_w_TKE_iso_dissipation[k] =
			(T) 0.5 * (w_iso_diss[k] + w_iso_diss[k + 1]);
	}

	memStx::free_buf(buf_id);
}
// -------------------------------------------------------------------------------------------- //

// TKE heat flux
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::w_TKE_heat_flux(
	T* _w_TKE_heat_flux,		// node: [C]

	const T* const Tw_flux,		// node: [W]
	const T c_Richardson, const wstGrid3d< T >& grid)
	// [W] heat flux component of T.K.E. equation defined at [C] node via interpolation
	//        ____
	//   Ri * w'T'
	//	
	// [T'W'] flux has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_w_TKE_heat_flux)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++)
		_w_TKE_heat_flux[k] = (T) 0.5 * c_Richardson * (Tw_flux[k] + Tw_flux[k + 1]);
}
// -------------------------------------------------------------------------------------------- //


// ui'uj' flux budget: production
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uv_production_shearU(T* _uv_production_shearU,		// node: [C]

	const T* const VW_bottom_uv,		// node: [~W-C]
	const T* const VW_top_uv,			// node: [~W-C]
	const T* const U,					// node: [C]
	const T* const V,					// node: [C]
	const T* const W,					// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradU] term of [u'v'] budget equation defined at [C] node
	//           _
	//   ____   dU
	// - v'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
	// [U] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uv_production_shearU)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {

		_uv_production_shearU[k] =
			-(VW_top_uv[k] - V[k] * W[k + 1]) * (U[k + 1] - U[k]) * grid.dzih[k]
			- (VW_bottom_uv[k] - V[k] * W[k]) * (U[k] - U[k - 1]) * grid.dzih[k];
	}
}

template< typename T >
void nse::uv_production_shearV(T* _uv_production_shearV,		// node: [C]

	const T* const UW_bottom_uv,		// node: [~W-C]
	const T* const UW_top_uv,			// node: [~W-C]
	const T* const U,					// node: [C]
	const T* const V,					// node: [C]
	const T* const W,					// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradV] term of [u'v'] budget equation defined at [C] node
	//           _
	//   ____   dV
	// - u'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
	// [V] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uv_production_shearV)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {

		_uv_production_shearV[k] =
			-(UW_top_uv[k] - U[k] * W[k + 1]) * (V[k + 1] - V[k]) * grid.dzih[k]
			- (UW_bottom_uv[k] - U[k] * W[k]) * (V[k] - V[k - 1]) * grid.dzih[k];
	}
}


template< typename T >
void nse::uw_production_shearU(
	T* _uw_production_shearU,		// node: [W]

	const T* const W2_u,			// node: [C]
	const T* const W2_uw,			// node: [W]
	const T* U,						// node: [C]
	const T* W,						// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradU] term of [u'w'] budget equation defined at [W] node
	//           _
	//   ____   dU
	// - w'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
	// [U] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uw_production_shearU)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {	// all [W] nodes, excluding walls

		_uw_production_shearU[k] =
			-(W2_u[k] - W[k] * W[k + 1]) * (U[k + 1] - U[k]) * grid.dziq[k]
			- (W2_u[k - 1] - W[k] * W[k - 1]) * (U[k - 1] - U[k - 2]) * grid.dziq[k - 1]

			- (W2_uw[k] - W[k] * W[k]) * (U[k] - U[k - 1]) * (grid.dziq[k] + grid.dziq[k - 1]);
	}

	w_dirichlet_bc_z(_uw_production_shearU, (T)0, (T)0, grid);
}

template< typename T >
void nse::uw_production_shearW(
	T* _uw_production_shearW,			// node: [W]

	const T* const UW_bottom_uw,		// node: [~C-W]
	const T* const UW_top_uw,			// node: [~C-W]
	const T* const U,					// node: [C]
	const T* const W,					// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradW] term of [u'w'] budget equation defined at [W] node
	//           _
	//   ____   dW
	// - u'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_uw_production_shearW)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {	// all [W] nodes, excluding walls
		_uw_production_shearW[k] = -(
			// discretization is based on ADV. form //
			(UW_bottom_uw[k] - (T)0.25 * (U[k] + U[k - 1]) * (W[k] + W[k - 1])) * (W[k] - W[k - 1]) * grid.dzmi[k] +
			(UW_top_uw[k] - (T)0.25 * (U[k] + U[k - 1]) * (W[k] + W[k + 1])) * (W[k + 1] - W[k]) * grid.dzmi[k]);
	}

	w_dirichlet_bc_z(_uw_production_shearW, (T)0, (T)0, grid);
}


template< typename T >
void nse::vw_production_shearV(
	T* _vw_production_shearV,		// node: [W]

	const T* const W2_v,			// node: [C]
	const T* const W2_vw,			// node: [W]
	const T* V,						// node: [C]
	const T* W,						// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradV] term of [v'w'] budget equation defined at [W] node
	//           _
	//   ____   dV
	// - w'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
	// [V] average has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_vw_production_shearV)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {	// all [W] nodes, excluding walls

		_vw_production_shearV[k] =
			-(W2_v[k] - W[k] * W[k + 1]) * (V[k + 1] - V[k]) * grid.dziq[k]
			- (W2_v[k - 1] - W[k] * W[k - 1]) * (V[k - 1] - V[k - 2]) * grid.dziq[k - 1]

			- (W2_vw[k] - W[k] * W[k]) * (V[k] - V[k - 1]) * (grid.dziq[k] + grid.dziq[k - 1]);
	}

	w_dirichlet_bc_z(_vw_production_shearV, (T)0, (T)0, grid);
}

template< typename T >
void nse::vw_production_shearW(
	T* _vw_production_shearW,			// node: [W]

	const T* const VW_bottom_vw,		// node: [~C-W]
	const T* const VW_top_vw,			// node: [~C-W]
	const T* const V,					// node: [C]
	const T* const W,					// node: [W]
	const wstGrid3d< T >& grid)
	//
	// production [gradW] term of [v'w'] budget equation defined at [W] node
	//           _
	//   ____   dW
	// - v'w' * --
	//          dz
	//
	// [W] average has to be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_vw_production_shearW)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {	// all [W] nodes, excluding walls
		_vw_production_shearW[k] = -(
			// discretization is based on ADV. form //
			(VW_bottom_vw[k] - (T)0.25 * (V[k] + V[k - 1]) * (W[k] + W[k - 1])) * (W[k] - W[k - 1]) * grid.dzmi[k] +
			(VW_top_vw[k] - (T)0.25 * (V[k] + V[k - 1]) * (W[k] + W[k + 1])) * (W[k + 1] - W[k]) * grid.dzmi[k]);
	}

	w_dirichlet_bc_z(_vw_production_shearW, (T)0, (T)0, grid);
}
// -------------------------------------------------------------------------------------------- //

// ui'uj' flux budget: transport
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uv_transport(
	T* _uv_transport,			// node: [C]

	const T* const uvw_flux,	// node: [W]
	const wstGrid3d< T >& grid)
	//
	// transport term of [u'v'] budget equation defined at [C] node
	//       ______________
	//      d[(u'v') * w'] 
	//	-  ---------------- 
	//			 dz
	// [u'v'w'] flux has be known at all [W] nodes, including walls
{
	int k;
#pragma omp parallel for private(k) shared(_uv_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_uv_transport[k] = -(uvw_flux[k + 1] - uvw_flux[k]) * grid.dzi[k];
	}
}


template< typename T >
void nse::uw_transport(
	T* _uw_transport,			// node: [W]

	const T* const uww_flux,	// node: [C]
	const wstGrid3d< T >& grid)
	//
	// transport term of [u'w'] budget equation defined at [W] node
	//       ______________
	//      d[(u'w') * w'] 
	//	-  ---------------- 
	//			 dz
	// [u'w'w'] flux has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uw_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_uw_transport[k] = -(uww_flux[k] - uww_flux[k - 1]) * (T)2.0 * grid.dzmi[k];
	}
}


template< typename T >
void nse::vw_transport(
	T* _vw_transport,			// node: [W]

	const T* const vww_flux,	// node: [C]
	const wstGrid3d< T >& grid)
	//
	// transport term of [v'w'] budget equation defined at [W] node
	//       ______________
	//      d[(v'w') * w'] 
	//	-  ---------------- 
	//			 dz
	// [v'w'w'] flux has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_vw_transport)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_vw_transport[k] = -(vww_flux[k] - vww_flux[k - 1]) * (T)2.0 * grid.dzmi[k];
	}
}
// -------------------------------------------------------------------------------------------- //

// pressure-strain correlations: 2 * p' * S'ij, i != j
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uw_pressure_strain(
	T* P2Suw_turb,				// node: [W]
	T* P2Suw_turb_c,			// node: [C] (shifting [W] -> [C])
	
	const T* const P2Suw,		// node: [W]
	const T* const Pressure,	// node: [C]
	const T* const U,			// node: [C]
	const wstGrid3d< T >& grid)
	//
	// pressure-strain term of [u'w'] budget equation defined at [W] node
	//
	// Qij = 2 * p'S'ij, i != j
	//
	// [U, P] averages have be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
	// [Quw] average has to be known at all [W] nodes including walls
{
	int k;
#pragma omp parallel for private(k) shared(P2Suw_turb)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		P2Suw_turb[k] = P2Suw[k] -
			(Pressure[k] + Pressure[k - 1]) * ((U[k] - U[k - 1]) * grid.dzmi[k]);
	}

	// interpolation [W] -> [C]:
#pragma omp parallel for private(k) shared(P2Suw_turb_c, P2Suw_turb)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		P2Suw_turb_c[k] = (T)0.5 * (P2Suw_turb[k] + P2Suw_turb[k + 1]);
	}
}

template< typename T >
void nse::vw_pressure_strain(
	T* P2Svw_turb,				// node: [W]
	T* P2Svw_turb_c,			// node: [C] (shifting [W] -> [C])

	const T* const P2Svw,		// node: [W]
	const T* const Pressure,	// node: [C]
	const T* const V,			// node: [C]
	const wstGrid3d< T >& grid)
	//
	// pressure-strain term of [v'w'] budget equation defined at [W] node
	//
	// Qij = 2 * p'S'ij, i != j
	//
	// [V, P] averages have be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
	// [Qvw] average has to be known at all [W] nodes including walls
{
	int k;
#pragma omp parallel for private(k) shared(P2Svw_turb)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		P2Svw_turb[k] = P2Svw[k] -
			(Pressure[k] + Pressure[k - 1]) * ((V[k] - V[k - 1]) * grid.dzmi[k]);
	}

	// interpolation [W] -> [C]:
#pragma omp parallel for private(k) shared(P2Svw_turb_c, P2Svw_turb)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		P2Svw_turb_c[k] = (T)0.5 * (P2Svw_turb[k] + P2Svw_turb[k + 1]);
	}
}
// -------------------------------------------------------------------------------------------- //

// ui'uj' flux budget: pressure work
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uw_pressure_work(
	T* _uw_pressure_work,		// node: [W]

	const T* const pu_flux,		// node: [C]
	const wstGrid3d< T >& grid)
	//
	// pressure work term of [u'w'] budget equation defined at [W] node
	//    ____
	//  d(p'u')
	// --------
	//    dz
	//
	// [p'u'] flux has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uw_pressure_work)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_uw_pressure_work[k] = -(pu_flux[k] - pu_flux[k - 1]) * (T)2.0 * grid.dzmi[k];
	}
}


template< typename T >
void nse::vw_pressure_work(
	T* _vw_pressure_work,		// node: [W]

	const T* const pv_flux,		// node: [C]
	const wstGrid3d< T >& grid)
	//
	// pressure work term of [v'w'] budget equation defined at [W] node
	//    ____
	//  d(p'v')
	// --------
	//    dz
	//
	// [p'v'] flux has to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_vw_pressure_work)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_vw_pressure_work[k] = -(pv_flux[k] - pv_flux[k - 1]) * (T)2.0 * grid.dzmi[k];
	}
}
// -------------------------------------------------------------------------------------------- //

// ui'uj' flux budget: dissipation
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uv_dissipation(
	T* _uv_dissipation,					// node: [C]

	const T* const UV_dissipation,		// node: [C]
	const T* const U,					// node: [C]
	const T* const V,					// node: [C]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// dissipation term of [u'v'] budget equation defined at [C] node
	//       ___________       ___________
	//          d^2(u')           d^2(v')
	//	nu * v' ------- + nu * u' ------- 
	//			dx(j)^2           dx(j)^2
	//
	// [U, V] average have be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T U_diffusion, V_diffusion;
	int k;

#pragma omp parallel for private(k, U_diffusion, V_diffusion) shared(_uv_dissipation)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {

		U_diffusion = c_kinematic_viscosity *
			((U[k + 1] - U[k]) * grid.dzp2i[k] - (U[k] - U[k - 1]) * grid.dzm2i[k]);
		V_diffusion = c_kinematic_viscosity *
			((V[k + 1] - V[k]) * grid.dzp2i[k] - (V[k] - V[k - 1]) * grid.dzm2i[k]);

		_uv_dissipation[k] = UV_dissipation[k] -
			U[k] * V_diffusion - V[k] * U_diffusion;
	}
}


template< typename T >
void nse::uw_dissipation(
	T* _uw_dissipation,					// node: [W]

	const T* const UW_dissipation,		// node: [W]
	const T* const U,					// node: [C]
	const T* const W,					// node: [W]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// dissipation term of [u'w'] budget equation defined at [W] node
	//       ___________       ___________
	//          d^2(u')           d^2(w')
	//	nu * w' ------- + nu * u' ------- 
	//			dx(j)^2           dx(j)^2
	//
	// [UW-dissipation, W] averages have to be known at all [W] nodes, including walls
	// [U] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T *U_diffusion, *W_diffusion;
	int u_buf_id = memStx::get_buf(&U_diffusion, grid.nz);
	int w_buf_id = memStx::get_buf(&W_diffusion, grid.nz);

	null(U_diffusion, grid.nz);
	null(W_diffusion, grid.nz);

	int k;

#pragma omp parallel for private(k) shared(U_diffusion, W_diffusion)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		U_diffusion[k] = c_kinematic_viscosity *
			((U[k + 1] - U[k]) * grid.dzp2i[k] - (U[k] - U[k - 1]) * grid.dzm2i[k]);
		W_diffusion[k] = c_kinematic_viscosity * 
			((W[k + 1] - W[k]) * grid.dzm2i[k] - (W[k] - W[k - 1]) * grid.dzp2i[k - 1]);
	}

	// assuming W = 0 & ~linear[laplace(W)] = 0 at walls -> no need for laplace(U) b.c.
	//
	w_dirichlet_bc_z(W_diffusion, (T)0, (T)0, grid);

#pragma omp parallel for private(k) shared(_uw_dissipation,\
	U_diffusion, W_diffusion)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_uw_dissipation[k] = UW_dissipation[k] -
			(T)0.5 * (
			(U[k] + U[k - 1]) * W_diffusion[k] +
			W[k] * (U_diffusion[k] + U_diffusion[k - 1]));
	}

	memStx::free_buf(u_buf_id);
	memStx::free_buf(w_buf_id);
}


template< typename T >
void nse::vw_dissipation(
	T* _vw_dissipation,					// node: [W]

	const T* const VW_dissipation,		// node: [W]
	const T* const V,					// node: [C]
	const T* const W,					// node: [W]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// dissipation term of [v'w'] budget equation defined at [W] node
	//       ___________       ___________
	//          d^2(v')           d^2(w')
	//	nu * w' ------- + nu * v' ------- 
	//			dx(j)^2           dx(j)^2
	//
	// [VW-dissipation, W] averages have to be known at all [W] nodes, including walls
	// [V] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T *V_diffusion, *W_diffusion;
	int v_buf_id = memStx::get_buf(&V_diffusion, grid.nz);
	int w_buf_id = memStx::get_buf(&W_diffusion, grid.nz);

	null(V_diffusion, grid.nz);
	null(W_diffusion, grid.nz);

	int k;

#pragma omp parallel for private(k) shared(V_diffusion, W_diffusion)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		V_diffusion[k] = c_kinematic_viscosity *
			((V[k + 1] - V[k]) * grid.dzp2i[k] - (V[k] - V[k - 1]) * grid.dzm2i[k]);
		W_diffusion[k] = c_kinematic_viscosity *
			((W[k + 1] - W[k]) * grid.dzm2i[k] - (W[k] - W[k - 1]) * grid.dzp2i[k - 1]);
	}

	// assuming W = 0 & ~linear[laplace(W)] = 0 at walls -> no need for laplace(U) b.c.
	//
	w_dirichlet_bc_z(W_diffusion, (T)0, (T)0, grid);

#pragma omp parallel for private(k) shared(_vw_dissipation,\
	V_diffusion, W_diffusion)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_vw_dissipation[k] = VW_dissipation[k] -
			(T)0.5 * (
			(V[k] + V[k - 1]) * W_diffusion[k] +
			W[k] * (V_diffusion[k] + V_diffusion[k - 1]));
	}

	memStx::free_buf(v_buf_id);
	memStx::free_buf(w_buf_id);
}
// -------------------------------------------------------------------------------------------- //

// ui'uj' flux budget: iso dissipation
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uv_iso_dissipation(
	T* _uv_iso_dissipation,				// node: [C]

	const T* const UV_iso_dissipation,	// node: [C]
	const T* const U,					// node: [C]
	const T* const V,					// node: [C]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// isotropic dissipation term of [u'v'] budget equation defined at [C] node
	//       _____________
	//   2   d(u')   d(v')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	//
	// *Note:
	//		computing isotropic dissipation of average at [W] nodes, including walls,
	//		and interpolating to [C] node
	//
	// [U, V] averages have be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T *avgz;
	int buf_id = memStx::get_buf(&avgz, grid.nz);

	null(avgz, grid.nz);

	int k;

	// computing at [W] nodes
#pragma omp parallel for private(k) shared(avgz)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes, including walls
		avgz[k] = (T)2.0 * c_kinematic_viscosity *
			((U[k] - U[k - 1]) * (T)2.0 * grid.dzmi[k]) *
			((V[k] - V[k - 1]) * (T)2.0 * grid.dzmi[k]);
	}

	// averaging: [W] -> [C]
#pragma omp parallel for private(k) shared(_uv_iso_dissipation, avgz)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		_uv_iso_dissipation[k] = -(UV_iso_dissipation[k] - (T)0.5 * (avgz[k] + avgz[k + 1]));
	}

	memStx::free_buf(buf_id);
}


template< typename T >
void nse::uw_iso_dissipation(
	T* _uw_iso_dissipation,				// node: [W]

	const T* const UW_iso_dissipation,	// node: [W]
	const T* const U,					// node: [C]
	const T* const W,					// node: [W]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// isotropic dissipation term of [u'w'] budget equation defined at [W] node
	//       _____________
	//   2   d(u')   d(w')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	//
	// *Note:
	//		computing isotropic dissipation of average at [C] nodes, setting 
	//		boundary conditions (assuming = 0 at walls)
	//		and interpolating to [W] node
	//
	// [UW-iso-dissipation, W] averages have to be known at all [W] nodes, including walls
	// [U] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T *avgz;
	int buf_id = memStx::get_buf(&avgz, grid.nz);

	null(avgz, grid.nz);

	int k;

#pragma omp parallel for private(k) shared(avgz)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		avgz[k] = (T)2.0 * c_kinematic_viscosity *
			((U[k + 1] - U[k]) * grid.dzpi[k] + (U[k] - U[k - 1]) * grid.dzmi[k]) *
			((W[k + 1] - W[k]) * grid.dzi[k]);
	}

	// assuming W = 0 & ~linear[laplace(W)] = 0 at walls -> dirichlet b.c.
	//
	c_dirichlet_bc_z(avgz, (T)0, (T)0, grid);

#pragma omp parallel for private(k) shared(_uw_iso_dissipation, avgz)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_uw_iso_dissipation[k] = -(UW_iso_dissipation[k] - (T)0.5 * (avgz[k] + avgz[k - 1]));
	}

	memStx::free_buf(buf_id);
}

template< typename T >
void nse::vw_iso_dissipation(
	T* _vw_iso_dissipation,				// node: [W]

	const T* const VW_iso_dissipation,	// node: [W]
	const T* const V,					// node: [C]
	const T* const W,					// node: [W]
	const T c_kinematic_viscosity,
	const wstGrid3d< T >& grid)
	//
	// isotropic dissipation term of [v'w'] budget equation defined at [W] node
	//       _____________
	//   2   d(v')   d(w')
	//-	-- * ----- * -----
	//	Re	 dx(j)   dx(j)
	//
	// *Note:
	//		computing isotropic dissipation of average at [C] nodes, setting 
	//		boundary conditions (assuming = 0 at walls)
	//		and interpolating to [W] node
	//
	// [VW-iso-dissipation, W] averages have to be known at all [W] nodes, including walls
	// [V] average has be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	T *avgz;
	int buf_id = memStx::get_buf(&avgz, grid.nz);

	null(avgz, grid.nz);

	int k;

#pragma omp parallel for private(k) shared(avgz)
	for (k = grid.gcz; k < grid.nz - grid.gcz; k++) {
		avgz[k] = (T)2.0 * c_kinematic_viscosity *
			((V[k + 1] - V[k]) * grid.dzpi[k] + (V[k] - V[k - 1]) * grid.dzmi[k]) *
			((W[k + 1] - W[k]) * grid.dzi[k]);
	}

	// assuming W = 0 & ~linear[laplace(W)] = 0 at walls -> dirichlet b.c.
	//
	c_dirichlet_bc_z(avgz, (T)0, (T)0, grid);

#pragma omp parallel for private(k) shared(_vw_iso_dissipation, avgz)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_vw_iso_dissipation[k] = -(VW_iso_dissipation[k] - (T)0.5 * (avgz[k] + avgz[k - 1]));
	}

	memStx::free_buf(buf_id);
}
// -------------------------------------------------------------------------------------------- //

// ui'uj' flux budget: buoyancy
// -------------------------------------------------------------------------------------------- //
template< typename T >
void nse::uw_buoyancy(
	T* _uw_buoyancy,			// node: [W]

	const T* const CU_uw,		// node: [W]
	const T* const C,			// node: [C]
	const T* const U,			// node: [C]
	const T c_Richardson, const wstGrid3d< T >& grid)
	//
	// buoyancy term of [u'w'] budget equation defined at [W] node
	//      ____
	// Ri * u'c'
	//
	// [CU] average has to be known at all [W] nodes, including walls
	// [C, U] averages have to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_uw_buoyancy)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_uw_buoyancy[k] = c_Richardson * (
			CU_uw[k] - (T)0.25 * (C[k] + C[k - 1]) * (U[k] + U[k - 1]));
	}
}


template< typename T >
void nse::vw_buoyancy(
	T* _vw_buoyancy,			// node: [W]

	const T* const CV_vw,		// node: [W]
	const T* const C,			// node: [C]
	const T* const V,			// node: [C]
	const T c_Richardson, const wstGrid3d< T >& grid)
	//
	// buoyancy term of [v'w'] budget equation defined at [W] node
	//      ____
	// Ri * v'c'
	//
	// [CV] average has to be known at all [W] nodes, including walls
	// [C, V] averages have to be known at all [C] nodes and ghost nodes: (k + 1/2), (k - 1/2)
{
	int k;
#pragma omp parallel for private(k) shared(_vw_buoyancy)
	for (k = grid.gcz; k <= grid.nz - grid.gcz; k++) {	// all [W] nodes
		_vw_buoyancy[k] = c_Richardson * (
			CV_vw[k] - (T)0.25 * (C[k] + C[k - 1]) * (V[k] + V[k - 1]));
	}
}
// -------------------------------------------------------------------------------------------- //