Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions .github/workflows/ci-sage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ on:
push:
tags:
- 'SageCI'
workflow_dispatch:

concurrency:
# Cancel previous runs of this workflow for the same branch
Expand Down Expand Up @@ -74,16 +75,16 @@ jobs:
PREFIX: /tmp/build
steps:
- name: Check out givaro
uses: actions/checkout@v2
uses: actions/checkout@v6
with:
path: build/pkgs/givaro/src
repository: linbox-team/givaro
- name: Check out ${{ env.SPKG }}
uses: actions/checkout@v2
uses: actions/checkout@v6
with:
path: build/pkgs/${{ env.SPKG }}/src
- name: Check out linbox
uses: actions/checkout@v2
uses: actions/checkout@v6
with:
path: build/pkgs/linbox/src
repository: linbox-team/linbox
Expand All @@ -107,7 +108,7 @@ jobs:
&& echo "sage-package create linbox --version git --tarball linbox-git.tar.gz --type=standard" >> upstream/update-pkgs.sh \
&& if [ -n "${{ env.REMOVE_PATCHES }}" ]; then echo "(cd ../build/pkgs/linbox/patches && rm -f ${{ env.REMOVE_PATCHES }}; :)" >> upstream/update-pkgs.sh; fi \
&& ls -l upstream/
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v7
with:
path: upstream
name: upstream
Expand All @@ -130,16 +131,3 @@ jobs:
# 'Package "sage-docker-..." is already associated with another repository.'
docker_push_repository: ghcr.io/${{ github.repository }}/fflas_ffpack_
needs: [dist]

macos:
uses: sagemath/sage/.github/workflows/macos.yml@develop
with:
osversion_xcodeversion_toxenv_tuples: >-
[["latest", "", "homebrew-macos-usrlocal-minimal"],
["latest", "", "homebrew-macos-usrlocal-standard"],
["13", "xcode_15.0", "homebrew-macos-usrlocal-standard"]]
targets: SAGE_CHECK=no SAGE_CHECK_givaro=yes SAGE_CHECK_fflas_ffpack=yes SAGE_CHECK_linbox=yes givaro fflas_ffpack linbox
sage_repo: sagemath/sage
sage_ref: develop
upstream_artifact: upstream
needs: [dist]
33 changes: 13 additions & 20 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,30 +1,23 @@
LUdivine-PLUQ
* Clean up of all base cases
* Only one routine, and automated switch to all implementations
* [DONE] Clean up of all base cases
* [DONE] Only one routine, and automated switch to all implementations

FTRTRI/FTRTRM
* Optimize base cases
* [DONE] Optimize base cases

Conversion double -> float for small moduli:
* should be done in each routine, not only gemm
* [DONE] should be done in each routine, not only gemm
(added to ftrsm, ftrmm, ftrsv)


Simplification of helpers:
* currently all mmhelpers have Amax,Amin,Bmax,Bmin, Cmax,Cmin,Outmax,
Outmin, and all related features for delayed reductions.
* this is not suited for other FieldTraits (say Generic,
Multiprec,...)
TODO:
- write a by-default minimal mmhelper
- specialize the mmhelper with delayedModular trait with all the
machinery
* The NeedPreaddreduction system is error-prone and ugly:
==> introduce AddHelpers
- carry max min outmax outmin info when used with a DelayedModular
FieldTraits
- decide when a mod is required in this case
- empty otherwise.
- Two bool params: add/sub switch, and inplace switch.
* [DONE] primary MMHelper template is now minimal (recLevel + parseq only)
MMHelperBounded base class provides all bounds tracking machinery
Specialized for LazyTag, DelayedTag, DefaultBoundedTag via inheritance
* [DONE] AddHelper<IsSub> replaces NeedPreAddReduction / NeedPreSubReduction
- LazyTag overload: overflow check with bounds tracking
- Generic overload: no-op
- IsSub template parameter: add/sub switch

CharPoly: How to handle polynomial arithmetic
* Option 1: generic representation, fixed Poly1Dom domain type, built when needed
Expand Down
109 changes: 51 additions & 58 deletions fflas-ffpack/fflas/fflas_fgemm.inl
Original file line number Diff line number Diff line change
Expand Up @@ -97,66 +97,59 @@ namespace FFLAS { namespace Protected{
}//FFLAS

namespace FFLAS{ namespace Protected{
template <class Field, class Element, class AlgoT, class ParSeqTrait>
inline bool NeedPreAddReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
Outmin = Op1min + Op2min;
Outmax = Op1max + Op2max;
if (WH.MaxStorableValue - Op1max < Op2max ||
WH.MaxStorableValue + Op1min < -Op2min){
// Reducing both Op1 and Op2
Op1min = Op2min = WH.FieldMin;
Op1max = Op2max = WH.FieldMax;
Outmin = 2*WH.FieldMin;
Outmax = 2*WH.FieldMax;
return true;
} else return false;
}

template <class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
inline bool NeedPreAddReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeT, ParSeqTrait >& WH)
{
Outmin = WH.FieldMin;
Outmax = WH.FieldMax;
return false;
}

template <class Field, class Element, class AlgoT, class ParSeqTrait>
inline bool NeedPreSubReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
Outmin = Op1min - Op2max;
Outmax = Op1max - Op2min;
if (WH.MaxStorableValue - Op1max < -Op2min ||
WH.MaxStorableValue - Op2max < -Op1min){
// Reducing both Op1 and Op2
Op1min = Op2min = WH.FieldMin;
Op1max = Op2max = WH.FieldMax;
Outmin = WH.FieldMin-WH.FieldMax;
Outmax = -Outmin;
return true;
} else return false;
}
/*! AddHelper: unified helper for pre-add/sub reduction checks.
* Replaces NeedPreAddReduction (IsSub=false) and NeedPreSubReduction (IsSub=true).
* - For LazyTag: checks overflow and decides when freduce is needed.
* - For other modes: no-op, returns false.
*/
template<bool IsSub = false>
struct AddHelper {
// LazyTag overload: performs the overflow check
template <class Field, class Element, class AlgoT, class ParSeqTrait>
static inline bool needsReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
if (IsSub) {
Outmin = Op1min - Op2max;
Outmax = Op1max - Op2min;
if (WH.MaxStorableValue - Op1max < -Op2min ||
WH.MaxStorableValue - Op2max < -Op1min){
Op1min = Op2min = WH.FieldMin;
Op1max = Op2max = WH.FieldMax;
Outmin = WH.FieldMin-WH.FieldMax;
Outmax = -Outmin;
return true;
}
} else {
Outmin = Op1min + Op2min;
Outmax = Op1max + Op2max;
if (WH.MaxStorableValue - Op1max < Op2max ||
WH.MaxStorableValue + Op1min < -Op2min){
Op1min = Op2min = WH.FieldMin;
Op1max = Op2max = WH.FieldMax;
Outmin = 2*WH.FieldMin;
Outmax = 2*WH.FieldMax;
return true;
}
}
return false;
}

template <class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
inline bool NeedPreSubReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeT, ParSeqTrait >& WH)
{
// Necessary? -> CP: Yes, for generic Mode of op
Outmin = WH.FieldMin;
Outmax = WH.FieldMax;
return false;
}
// Generic overload: no reduction needed
template <class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
static inline bool needsReduction (Element& Outmin, Element& Outmax,
Element& Op1min, Element& Op1max,
Element& Op2min, Element& Op2max,
MMHelper<Field, AlgoT, ModeT, ParSeqTrait >& WH)
{
Outmin = WH.FieldMin;
Outmax = WH.FieldMax;
return false;
}
};

//Probable bug here due to overflow of int64_t
template<class Field, class Element, class AlgoT, class ParSeqTrait>
Expand Down
28 changes: 14 additions & 14 deletions fflas-ffpack/fflas/fflas_fgemm/schedule_winograd.inl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ namespace FFLAS { namespace BLAS3 {
// U5 = P3 + U4 in C12
// BIG TASK with 5 Addin function calls
// TASK(MODE(READWRITE(X15, C12) CONSTREFERENCE(F, DF, WH, U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax)),
if (Protected::NeedPreAddReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
TASK(MODE(READWRITE(X15) CONSTREFERENCE(F)),
pfreduce (F, mr, x1rd, X15, x1rd, NUM_THREADS);
);
Expand All @@ -228,7 +228,7 @@ namespace FFLAS { namespace BLAS3 {
);
CHECK_DEPENDENCIES;
// TASK(MODE(READWRITE(C12, C21) CONSTREFERENCE(F, DF, WH, U3Min, U3Max, U2Min, U2Max)),
if (Protected::NeedPreAddReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
);
Expand All @@ -242,7 +242,7 @@ namespace FFLAS { namespace BLAS3 {
);
CHECK_DEPENDENCIES;
// TASK(MODE(READWRITE(C12, C22) CONSTREFERENCE(F, DF, WH) VALUE(U4Min, U4Max, U2Min, U2Max)),
if (Protected::NeedPreAddReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
TASK(MODE(READWRITE(C22) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, C22, ldc, NUM_THREADS);
);
Expand All @@ -256,7 +256,7 @@ namespace FFLAS { namespace BLAS3 {
);
CHECK_DEPENDENCIES;
// TASK(MODE(READWRITE(C22, C21) CONSTREFERENCE(F, DF, WH) VALUE(U3Min, U3Max, U7Min, U7Max)),
if (Protected::NeedPreAddReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
TASK(MODE(READWRITE(C21) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, C21, ldc, NUM_THREADS);
);
Expand All @@ -269,7 +269,7 @@ namespace FFLAS { namespace BLAS3 {
pfaddin(DF,mr,nr,C21,ldc,C22,ldc, NUM_THREADS);
);
// TASK(MODE(READWRITE(C12, CC_11) CONSTREFERENCE(F, DF, WH) VALUE(U5Min, U5Max, U4Min, U4Max)),
if (Protected::NeedPreAddReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
);
Expand All @@ -286,7 +286,7 @@ namespace FFLAS { namespace BLAS3 {
// U6 = U3 - P4 in C21
DFElt U6Min, U6Max;
// TASK(MODE(READWRITE(C_11, C21) CONSTREFERENCE(F, DF, WH) VALUE(U6Min, U6Max, U3Min, U3Max)),
if (Protected::NeedPreSubReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
if (Protected::AddHelper<true>::needsReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
TASK(MODE(READWRITE(CC_11) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, C_11, nr, NUM_THREADS);
);
Expand All @@ -304,7 +304,7 @@ namespace FFLAS { namespace BLAS3 {
// U1 = P2 + P1 in C11
DFElt U1Min, U1Max;
// TASK(MODE(READWRITE(C11, X15/*, X14, X13, X12, X11*/) CONSTREFERENCE(F, DF, WH) VALUE(U1Min, U1Max)),
if (Protected::NeedPreAddReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
TASK(MODE(READWRITE(X15) CONSTREFERENCE(F)),
pfreduce (F, mr, nr, X15, x1rd, NUM_THREADS);
);
Expand Down Expand Up @@ -452,7 +452,7 @@ namespace FFLAS { namespace BLAS3 {
// U2 = P1 + P6 in C12 and
DFElt U2Min, U2Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
freduce (F, mr, nr, X1, nr);
freduce (F, mr, nr, C12, ldc);
}
Expand All @@ -461,7 +461,7 @@ namespace FFLAS { namespace BLAS3 {
// U3 = P7 + U2 in C21 and
DFElt U3Min, U3Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
freduce (F, mr, nr, C12, ldc);
freduce (F, mr, nr, C21, ldc);
}
Expand All @@ -471,7 +471,7 @@ namespace FFLAS { namespace BLAS3 {
// U4 = P5 + U2 in C12 and
DFElt U4Min, U4Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
if (Protected::AddHelper<false>::needsReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
freduce (F, mr, nr, C22, ldc);
freduce (F, mr, nr, C12, ldc);
}
Expand All @@ -480,7 +480,7 @@ namespace FFLAS { namespace BLAS3 {
// U7 = P5 + U3 in C22 and
DFElt U7Min, U7Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
freduce (F, mr, nr, C21, ldc);
freduce (F, mr, nr, C22, ldc);
}
Expand All @@ -489,7 +489,7 @@ namespace FFLAS { namespace BLAS3 {
// U5 = P3 + U4 in C12
DFElt U5Min, U5Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
freduce (F, mr, nr, C12, ldc);
freduce (F, mr, nr, C11, ldc);
}
Expand All @@ -508,7 +508,7 @@ namespace FFLAS { namespace BLAS3 {
// U6 = U3 - P4 in C21
DFElt U6Min, U6Max;
// This test will be optimized out
if (Protected::NeedPreSubReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
if (Protected::AddHelper<true>::needsReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
freduce (F, mr, nr, C11, ldc);
freduce (F, mr, nr, C21, ldc);
}
Expand All @@ -522,7 +522,7 @@ namespace FFLAS { namespace BLAS3 {
// U1 = P2 + P1 in C11
DFElt U1Min, U1Max;
// This test will be optimized out
if (Protected::NeedPreAddReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
freduce (F, mr, nr, X1, nr);
freduce (F, mr, nr, C11, ldc);
}
Expand Down
8 changes: 4 additions & 4 deletions fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc.inl
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ namespace FFLAS { namespace BLAS3 {

// U1 = P2 + P1 in C11
DFElt U1Min, U1Max;
if (Protected::NeedPreAddReduction (U1Min,U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
if (Protected::AddHelper<false>::needsReduction (U1Min,U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
freduce(F,mr,nr,X1,nr);
freduce(F,mr,nr,C11,ldc);
}
Expand All @@ -324,7 +324,7 @@ namespace FFLAS { namespace BLAS3 {

// U4 = U2 + C12 in C12
DFElt U4Min, U4Max;
if (Protected::NeedPreAddReduction (U4Min, U4Max, H6.Outmin, H6.Outmax, C12Min, C12Max, WH)){
if (Protected::AddHelper<false>::needsReduction (U4Min, U4Max, H6.Outmin, H6.Outmax, C12Min, C12Max, WH)){
freduce(F,mr,nr,C12,ldc);
freduce(F,mr,nr,X1,nr);
}
Expand Down Expand Up @@ -368,15 +368,15 @@ namespace FFLAS { namespace BLAS3 {

// U7 = U3 + C22 in C22
DFElt U7Min, U7Max;
if (Protected::NeedPreAddReduction (U7Min, U7Max, H7.Outmin, H7.Outmax, C22Min, C22Max, WH)){
if (Protected::AddHelper<false>::needsReduction (U7Min, U7Max, H7.Outmin, H7.Outmax, C22Min, C22Max, WH)){
freduce(F,mr,nr,X1,nr);
freduce(F,mr,nr,C22,ldc);
}
faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C22,ldc);

// U6 = U3 - P4 in C21
DFElt U6Min, U6Max;
if (Protected::NeedPreSubReduction(U6Min, U6Max, H7.Outmin, H7.Outmax, H4.Outmin, H4.Outmax, WH)){
if (Protected::AddHelper<true>::needsReduction(U6Min, U6Max, H7.Outmin, H7.Outmax, H4.Outmin, H4.Outmax, WH)){
freduce(F,mr,nr,X1,nr);
freduce(F,mr,nr,C21,ldc);
}
Expand Down
Loading