UDocumentation UE5.7 10.02.2026 (Source)
API documentation for Unreal Engine 5.7
Krylov.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2#pragma once
3
4#include "ChaosCheck.h"
5#include "ChaosLog.h"
9#include "Chaos/Vector.h"
10
11namespace Chaos {
12
13template <typename Func, class T>
15 Func multiplyA,
16 TArray<T>& x,
17 const TArray<T>& b,
18 const int max_it,
19 const T res = 1e-4,
20 bool check_residual = false,
21 int min_parallel_batch_size = 1000)
22{
23 //ToDo(ChaosFlesh): take an optional use_list of vertex indices to bypass unused verts
24 auto dotProduct = [](const TArray<T>& x, const TArray<T>& y)
25 {
26 T result = T(0);
27 checkfSlow(x.Num() == y.Num(), TEXT("LanczosCG: trying to take the dot product with vectors of different size."));
28 //PhysicsParallelFor(x.Num(), [&](const int32 i)
29 for (int32 i = 0; i < x.Num(); i++)
30 {
31 result += x[i] * y[i];
32 }/*,
33 x.Num() < min_parallel_batch_size);*/
34 return result;
35 };
36
37 auto AXPY = [min_parallel_batch_size](TArray<T>& y, T a, const TArray<T>& x)
38 {
39 checkfSlow(x.Num() == y.Num(), TEXT("LanczosCG: trying to take a linear combination of vectors with different sizes."));
40 PhysicsParallelFor(x.Num(), [&](const int32 i)
41 {
42 y[i] += a * x[i];
43 },
44 x.Num() < min_parallel_batch_size); // Single threaded for less than min_parallel_batch_size operations.
45 };
46
48 {
49 PhysicsParallelFor(y.Num(), [&](const int32 i)
50 {
51 y[i] *= a;
52 },
53 y.Num() < min_parallel_batch_size); // Single threaded for less than min_parallel_batch_size operations.
54 };
55
56 auto set = [min_parallel_batch_size](TArray<T>& y, const TArray<T>& x)
57 {
58 checkfSlow(x.Num() == y.Num(), TEXT("LanczosCG: trying to set to vectors with different sizes."));
59 PhysicsParallelFor(x.Num(), [&](const int32 i)
60 {
61 y[i] = x[i];
62 },
63 x.Num() < min_parallel_batch_size); // Single threaded for less than 1k operations.
64 };
65
67}
68
69template <class T, typename Func, int32 d=3>
71 Func multiplyA,
73 const TArray<TVector<T, d>>& b,
74 const int max_it,
75 const T res = 1e-4,
76 const TArray<int32>* use_list = nullptr)
77{
78 using TV = TVector<T, d>;
79 auto dot_product = [&use_list](const TArray<TV>& x, const TArray<TV>& y)
80 {
81 checkfSlow(x.Num() == y.Num(), TEXT("Chaos::LanczosCG()::dot_product[]: x and y not sized consistently."));
82 T result = T(0);
83 if (use_list != nullptr)
84 {
85 //PhysicsParallelFor(use_list->Num(), [&](const int32 i)
86 //{
87 for (int32 i = 0; i < use_list->Num(); ++i)
88 {
89 T dot = T(0);
90 int32 j = (*use_list)[i];
91 for (int32 alpha = 0; alpha < d; alpha++)
92 {
93 dot += x[j][alpha] * y[j][alpha];
94 }
95 result += dot;
96 }
97 //},
98 //use_list->Num() < 1000);
99 }
100 else
101 {
102 //PhysicsParallelFor(x.Num(), [&](const int32 i)
103 //{
104 for (int32 i = 0; i < x.Num(); i++)
105 {
106 T dot = T(0);
107 for (size_t alpha = 0; alpha < d; alpha++)
108 {
109 dot += x[i][alpha] * y[i][alpha];
110 }
111 result += dot;
112 }/*,
113 x.Num() < 1000);*/
114 }
115 return result;
116 };
117 auto AXPY = [&use_list](TArray<TV>& y, T a, const TArray<TV>& x)
118 {
119 checkfSlow(x.Num() == y.Num(), TEXT("Chaos::LanczosCG()::AXPY[]: x and y not sized consistently."));
120 if (use_list != nullptr)
121 {
122 PhysicsParallelFor(use_list->Num(), [&](const int32 i)
123 {
124 const int32 j = (*use_list)[i];
125 for (int32 alpha = 0; alpha < d; alpha++)
126 {
127 y[j][alpha] += a * x[j][alpha];
128 }
129 },
130 use_list->Num() < 1000);
131 }
132 else
133 {
134 PhysicsParallelFor(x.Num(), [&](const int32 i)
135 {
136 for (int32 alpha = 0; alpha < d; alpha++)
137 {
138 y[i][alpha] += a * x[i][alpha];
139 }
140 },
141 x.Num() < 1000);
142 }
143 };
144 auto scale = [&use_list](TArray<TV>& y, T a)
145 {
146 if (use_list != nullptr)
147 {
148 PhysicsParallelFor(use_list->Num(), [&](const int32 i)
149 {
150 const int32 j = (*use_list)[i];
151 for (int32 alpha = 0; alpha < d; alpha++)
152 {
153 y[j][alpha] = a * y[j][alpha];
154 }
155 });
156 }
157 else
158 {
159 PhysicsParallelFor(y.Num(), [&](const int32 i)
160 {
161 for (size_t alpha = 0; alpha < d; alpha++)
162 {
163 y[i][alpha] = a * y[i][alpha];
164 }
165 });
166 }
167 };
168 auto set = [&use_list](TArray<TV>& y, const TArray<TV>& x)
169 {
170 if (use_list != nullptr)
171 {
172 PhysicsParallelFor(use_list->Num(), [&](const int32 i)
173 {
174 const int32 j = (*use_list)[i];
175 for (size_t alpha = 0; alpha < d; alpha++)
176 {
177 y[j][alpha] = x[j][alpha];
178 }
179 });
180 }
181 else
182 {
183 PhysicsParallelFor(y.Num(), [&](const int32 i)
184 {
185 for (int32 alpha = 0; alpha < d; alpha++)
186 {
187 y[i][alpha] = x[i][alpha];
188 }
189 });
190 }
191 };
192
194}
195
196template <typename T, typename TV, typename Func1, typename Func2, typename Func3, typename Func4, typename Func5>
199 Func2 dotProduct,
200 Func3 AXPY,
201 Func4 scale,
202 Func5 set,
203 TArray<TV>& x,
204 const TArray<TV>& b,
205 const int max_it,
206 const T res = T(1e-4),
207 bool check_residual = false)
208{
209 // multiplyA = [&A](TV &y, const TV& x): y = A*x;
210 // dotProduct = [](const TV& x, const TV& y): return x^Ty
211 // AXPY = [](Vector& y, T a, const TV& x): y += a*x;
212 // scale = [](Vector& y, T a): y <- a*y
213 // set = [](TV& y, const TV& x): y <- x
214 // Printing out: use
215 // (*((TV*)&b))[i]
216 // ((Vector*)&b)->operator[](i)
217
218 TArray<TV> v; v.SetNum(x.Num());
219 TArray<TV> q; q.SetNum(x.Num());
221 TArray<TV> c; c.SetNum(x.Num());
222
224 if (beta <= res)
225 {
226 UE_LOG(LogChaos, Warning, TEXT("Lanczos residual = %f. Lanczos converged in 0 iterations."), beta)
227 scale(x, T(0));
228 return;
229 }
230 set(q, b);
231 scale(q, T(1) / beta);
232 multiplyA(v, q);
233 T alpha = dotProduct(v, q);
234 T d = alpha;
235 T p = beta / d;
236 set(c, q);
237 set(x, c);
238 scale(x, p);
239 AXPY(v, -alpha, q);
240 T residual{};
241 TArray<TV> y; y.SetNum(x.Num());
242 for (int it = 1; it < max_it; it++)
243 {
245 if (check_residual == true)
246 {
247 multiplyA(y, x);
248 AXPY(y, T(-1), b);
250 if (residual < res)
251 {
252 UE_LOG(LogChaos, VeryVerbose, TEXT("Ax - b residual = %g. Lanczos converged in %d iterations."), residual, it);
253 return;
254 }
255 }
256 else
257 {
258 if (beta < res)
259 {
260 UE_LOG(LogChaos, VeryVerbose, TEXT("Lanczos residual = %g. Lanczos converged in %d iterations."), beta, it);
261 return;
262 }
263 }
264 T mu = beta / d;
265 set(q_old, q);
266 set(q, v);
267 scale(q, T(1) / beta);
268 multiplyA(v, q);
269 alpha = dotProduct(v, q);
270 T d_new = alpha - mu * mu * d;
271 p = -p * d * mu / d_new;
272 scale(c, -mu);
273 AXPY(c, T(1), q);
274 AXPY(x, p, c);
275 AXPY(v, -alpha, q);
276 AXPY(v, -beta, q_old);
277 d = d_new;
278 }
279
280 if (check_residual)
281 {
282 UE_LOG(LogChaos, Verbose, TEXT("Lanczos used max iterations (%d). Residual = %g."), max_it, residual);
283 }
284 else
285 {
286 UE_LOG(LogChaos, Verbose, TEXT("Lanczos used max iterations (%d)."), max_it);
287 }
288}
289
290
291} // namespace Chaos
292
#define checkfSlow(expr, format,...)
Definition AssertionMacros.h:333
#define TEXT(x)
Definition Platform.h:1272
FPlatformTypes::int32 int32
A 32-bit signed integer.
Definition Platform.h:1125
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
#define UE_LOG(CategoryName, Verbosity, Format,...)
Definition LogMacros.h:270
Definition Vector.h:41
Definition Array.h:670
UE_REWRITE SizeType Num() const
Definition Array.h:1144
void SetNum(SizeType NewNum, EAllowShrinking AllowShrinking=UE::Core::Private::AllowShrinkingByDefault< AllocatorType >())
Definition Array.h:2308
Definition SkeletalMeshComponent.h:307
void CHAOS_API PhysicsParallelFor(int32 InNum, TFunctionRef< void(int32)> InCallable, bool bForceSingleThreaded=false)
Definition Parallel.cpp:55
void LanczosCG(Func multiplyA, TArray< T > &x, const TArray< T > &b, const int max_it, const T res=1e-4, bool check_residual=false, int min_parallel_batch_size=1000)
Definition Krylov.h:14
float v
Definition radaudio_mdct.cpp:62
static UE_FORCEINLINE_HINT float Sqrt(float Value)
Definition GenericPlatformMath.h:552