From e0104bae2f69a09fa95464fbfd32f7c0a71ccdbe Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 29 Nov 2024 08:39:29 +0800 Subject: [PATCH 01/93] Update --- go.mod | 28 +- go.sum | 55 +- lightning/pkg/importer/table_import.go | 31 +- pkg/lightning/mydump/parquet_parser.go | 923 ++++++++++---------- pkg/lightning/mydump/parquet_parser_test.go | 2 +- 5 files changed, 530 insertions(+), 509 deletions(-) diff --git a/go.mod b/go.mod index d32817b48736c..0b0be3555cdd3 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/YangKeao/go-mysql-driver v0.0.0-20240627104025-dd5589458cfa github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 + github.com/apache/arrow-go/v18 v18.0.0 github.com/apache/skywalking-eyes v0.4.0 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/ashanbrown/makezero v1.1.1 @@ -71,7 +72,7 @@ require ( github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df github.com/karamaru-alpha/copyloopvar v1.1.0 github.com/kisielk/errcheck v1.8.0 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.11 github.com/ks3sdklib/aws-sdk-go v1.2.9 github.com/lestrrat-go/jwx/v2 v2.0.21 github.com/mgechev/revive v1.5.1 @@ -140,7 +141,7 @@ require ( golang.org/x/time v0.7.0 golang.org/x/tools v0.27.0 google.golang.org/api v0.169.0 - google.golang.org/grpc v1.63.2 + google.golang.org/grpc v1.67.1 gopkg.in/yaml.v2 v2.4.0 gorm.io/driver/mysql v1.5.7 gorm.io/gorm v1.25.11 @@ -152,28 +153,29 @@ require ( require ( filippo.io/edwards25519 v1.1.0 // indirect - github.com/andybalholm/brotli v1.0.5 // indirect + github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/apache/arrow/go/v12 v12.0.1 // indirect github.com/cockroachdb/errors v1.11.1 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/getsentry/sentry-go v0.27.0 // indirect github.com/goccy/go-reflect v1.2.0 // indirect - github.com/google/flatbuffers v2.0.8+incompatible // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/qri-io/jsonpointer v0.1.1 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect ) require ( cloud.google.com/go v0.112.1 // indirect - cloud.google.com/go/compute/metadata v0.3.0 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect cloud.google.com/go/iam v1.1.6 // indirect cloud.google.com/go/pubsub v1.36.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0 // indirect @@ -185,7 +187,7 @@ require ( github.com/Masterminds/sprig/v3 v3.2.2 // indirect github.com/VividCortex/ewma v1.2.0 // indirect github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect - github.com/apache/thrift v0.16.0 // indirect + github.com/apache/thrift v0.21.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v2 v2.0.4 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect @@ -209,10 +211,10 @@ require ( github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect - github.com/goccy/go-json v0.10.2 // indirect + github.com/goccy/go-json v0.10.3 // indirect github.com/golang-jwt/jwt/v4 v4.5.1 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect - github.com/golang/glog v1.2.0 // indirect + github.com/golang/glog v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/licensecheck v0.3.1 // indirect @@ -303,9 +305,9 @@ require ( golang.org/x/mod v0.22.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect - google.golang.org/protobuf v1.34.2 + google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + google.golang.org/protobuf v1.35.1 gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index e4704e0895524..ae1c56e2cf826 100644 --- a/go.sum +++ b/go.sum @@ -21,8 +21,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= -cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/iam v1.1.6 h1:bEa06k05IO4f4uJonbB5iAgKTPpABy1ayxaIZV/GHVc= @@ -100,16 +100,19 @@ github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1L github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 h1:Q/yk4z/cHUVZfgTqtD09qeYBxHwshQAjVRX73qs8UH0= github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581/go.mod h1:RcDobYh8k5VP6TNybz9m++gL3ijVI5wueVr0EM10VsU= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= -github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/apache/arrow-go/v18 v18.0.0 h1:1dBDaSbH3LtulTyOVYaBCHO3yVRwjV+TZaqn3g6V7ZM= +github.com/apache/arrow-go/v18 v18.0.0/go.mod h1:t6+cWRSmKgdQ6HsxisQjok+jBpKGhRDiqcf3p0p/F+A= github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg= github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw= github.com/apache/skywalking-eyes v0.4.0 h1:O13kdRU6FCEZevfD01mdhTgCZLLfPZIQ0GXZrLl7FpQ= github.com/apache/skywalking-eyes v0.4.0/go.mod h1:WblDbBgOLsLN0FJEBa9xj6PhuUA/J6spKYVTG4/F8Ls= github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= +github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= @@ -304,8 +307,8 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/goccy/go-reflect v1.2.0 h1:O0T8rZCuNmGXewnATuKYnkL0xm6o8UNOJZd/gOkb9ms= github.com/goccy/go-reflect v1.2.0/go.mod h1:n0oYZn8VcV2CkWTxi8B9QjkCoq6GTtCEdfmR66YhFtE= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -320,8 +323,8 @@ github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17w github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.0 h1:uCdmnmatrKCgMBlM4rMuJZWOkPDqdbZPnrMXDY4gI68= -github.com/golang/glog v1.2.0/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= +github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= +github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -374,8 +377,9 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -537,13 +541,14 @@ github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= -github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -654,8 +659,9 @@ github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2/go.mod h1:iIss55rK github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= -github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pingcap/badger v1.5.1-0.20241015064302-38533b6cbf8d h1:eHcokyHxm7HVM+7+Qy1zZwC7NhX9wVNX8oQDcSZw1qI= github.com/pingcap/badger v1.5.1-0.20241015064302-38533b6cbf8d/go.mod h1:KiO2zumBCWx7yoVYoFRpb+DNrwEPk1pR1LF7NvOACMQ= github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= @@ -858,6 +864,8 @@ github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 h1:a742S4V5A15F93smuVxA60LQWsrCnN8bKeWDBARU1/k= github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -1276,8 +1284,9 @@ gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJ gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= @@ -1342,10 +1351,10 @@ google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUEr4jDysRDLrm4PHePlge4v4TGAlxY= google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= -google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= -google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 h1:AgADTJarZTBqgjiUzRgfaBchgYB3/WFTC80GPwsMcRI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -1364,8 +1373,8 @@ google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTp google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= -google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= -google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -1383,8 +1392,8 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 17f342fc535c9..20c5a0b4dfd1c 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -1196,40 +1196,13 @@ func (tr *TableImporter) postProcess( return true, nil } +// TODO(joechenrh): remove this function func getChunkCompressedSizeForParquet( ctx context.Context, chunk *checkpoints.ChunkCheckpoint, store storage.ExternalStorage, ) (int64, error) { - reader, err := mydump.OpenReader(ctx, &chunk.FileMeta, store, storage.DecompressConfig{ - ZStdDecodeConcurrency: 1, - }) - if err != nil { - return 0, errors.Trace(err) - } - parser, err := mydump.NewParquetParser(ctx, store, reader, chunk.FileMeta.Path) - if err != nil { - _ = reader.Close() - return 0, errors.Trace(err) - } - //nolint: errcheck - defer parser.Close() - err = parser.Reader.ReadFooter() - if err != nil { - return 0, errors.Trace(err) - } - rowGroups := parser.Reader.Footer.GetRowGroups() - var maxRowGroupSize int64 - for _, rowGroup := range rowGroups { - var rowGroupSize int64 - columnChunks := rowGroup.GetColumns() - for _, columnChunk := range columnChunks { - columnChunkSize := columnChunk.MetaData.GetTotalCompressedSize() - rowGroupSize += columnChunkSize - } - maxRowGroupSize = max(maxRowGroupSize, rowGroupSize) - } - return maxRowGroupSize, nil + return 100, nil } func updateStatsMeta(ctx context.Context, db *sql.DB, tableID int64, count int) { diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 522772d7ccaf6..26af68bd2af24 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -17,11 +17,9 @@ package mydump import ( "bytes" "context" - "encoding/binary" "fmt" "io" "math/big" - "reflect" "strings" "time" @@ -29,10 +27,11 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/types" - "github.com/xitongsys/parquet-go/parquet" - preader "github.com/xitongsys/parquet-go/reader" "github.com/xitongsys/parquet-go/source" - "go.uber.org/zap" + + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/file" + "github.com/apache/arrow-go/v18/parquet/schema" ) const ( @@ -49,22 +48,466 @@ const ( timeLayout = "2006-01-02 15:04:05.999999" ) +// Buffers to store data read from columns. +// Declare here to avoid frequent allocation. +type readBuffer struct { + fixedLenArrayBuffer []parquet.FixedLenByteArray + float32Buffer []float32 + float64Buffer []float64 + byteArrayBuffer []parquet.ByteArray + int32Buffer []int32 + int64Buffer []int64 + int96Buffer []parquet.Int96 + boolBuffer []bool +} + +// convertedType is older representation of the logical type in parquet +// ref: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md +type convertedType struct { + converted schema.ConvertedType + decimalMeta schema.DecimalMetadata +} + +func binaryToDecimalStr(rawBytes []byte, scale int) string { + negative := rawBytes[0] > 127 + if negative { + for i := 0; i < len(rawBytes); i++ { + rawBytes[i] = ^rawBytes[i] + } + for i := len(rawBytes) - 1; i >= 0; i-- { + rawBytes[i]++ + if rawBytes[i] != 0 { + break + } + } + } + + intValue := big.NewInt(0) + intValue = intValue.SetBytes(rawBytes) + val := fmt.Sprintf("%0*d", scale, intValue) + dotIndex := len(val) - scale + var res strings.Builder + if negative { + res.WriteByte('-') + } + if dotIndex == 0 { + res.WriteByte('0') + } else { + res.WriteString(val[:dotIndex]) + } + if scale > 0 { + res.WriteByte('.') + res.WriteString(val[dotIndex:]) + } + return res.String() +} + +func formatTime(v int64, unit string, format, utcFormat string) string { + var t time.Time + switch unit { + case "MICROS": + t = time.UnixMicro(v) + case "MILLIS": + t = time.UnixMilli(v) + default: + t = time.Unix(0, v) + } + + return t.UTC().Format(utcFormat) +} + // ParquetParser parses a parquet file for import // It implements the Parser interface. type ParquetParser struct { - Reader *preader.ParquetReader - columns []string - columnMetas []*parquet.SchemaElement - rows []any - readRows int64 - curStart int64 - curIndex int - lastRow Row - logger log.Logger + reader *file.Reader + colMetas []convertedType + columnNames []string + + colReaders []file.ColumnChunkReader + colBuffers []readBuffer + rows [][]types.Datum + curIdx int + avail int + + curRowGroup int + totalRowGroup int + + curRowInGroup int + totalRowsInGroup int + curRows int + totalRows int + + lastRow Row + logger log.Logger readSeekCloser ReadSeekCloser } +func (p *ParquetParser) setStringData(readNum, col, offset int) { + buf := p.colBuffers[col].byteArrayBuffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetString(buf[i].String(), "utf8mb4_bin") + } +} + +func (p *ParquetParser) setInt32Data(readNum, col, offset int) { + buf := p.colBuffers[col].int32Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetInt64(int64(buf[i])) + } +} + +func (p *ParquetParser) setUint32Data(readNum, col, offset int) { + buf := p.colBuffers[col].int32Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetUint64(uint64(buf[i])) + } +} + +func (p *ParquetParser) setInt64Data(readNum, col, offset int) { + buf := p.colBuffers[col].int64Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetInt64(int64(buf[i])) + } +} + +func (p *ParquetParser) setUint64Data(readNum, col, offset int) { + buf := p.colBuffers[col].int64Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetUint64(uint64(buf[i])) + } +} + +func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { + buf := p.colBuffers[col].int32Buffer + for i := 0; i < readNum; i++ { + timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z") + p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + } +} + +func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { + buf := p.colBuffers[col].int32Buffer + for i := 0; i < readNum; i++ { + timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z") + p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + } +} + +func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { + buf := p.colBuffers[col].int64Buffer + for i := 0; i < readNum; i++ { + timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout) + p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + } +} + +func (p *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { + buf := p.colBuffers[col].int64Buffer + for i := 0; i < readNum; i++ { + timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout) + p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + } +} + +func (p *ParquetParser) setDateData(readNum, col, offset int) { + buf := p.colBuffers[col].int32Buffer + for i := 0; i < readNum; i++ { + dateStr := time.Unix(int64(buf[i])*86400, 0).Format(time.DateOnly) + p.rows[offset+i][col].SetString(dateStr, "utf8mb4_bin") + } +} + +func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { + colTp := p.colReaders[col].Type() + decimal := p.colMetas[col].decimalMeta + + for i := 0; i < readNum; i++ { + if colTp == parquet.Types.Int32 || colTp == parquet.Types.Int32 { + v := p.colBuffers[col].int64Buffer[i] + if colTp == parquet.Types.Int32 { + v = int64(p.colBuffers[col].int32Buffer[i]) + } + if !decimal.IsSet || decimal.Scale == 0 { + p.rows[offset+i][col].SetInt64(v) + continue + } + minLen := decimal.Scale + 1 + if v < 0 { + minLen++ + } + val := fmt.Sprintf("%0*d", minLen, v) + dotIndex := len(val) - int(decimal.Scale) + p.rows[offset+i][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") + } else if colTp == parquet.Types.FixedLenByteArray { + s := binaryToDecimalStr(p.colBuffers[col].fixedLenArrayBuffer[i], int(decimal.Scale)) + p.rows[offset+i][col].SetString(s, "utf8mb4_bin") + } else { + s := binaryToDecimalStr(p.colBuffers[col].byteArrayBuffer[i], int(decimal.Scale)) + p.rows[offset+i][col].SetString(s, "utf8mb4_bin") + } + } + return nil +} + +func (p *ParquetParser) setBoolData(readNum, col, offset int) { + buf := p.colBuffers[col].boolBuffer + for i := 0; i < readNum; i++ { + if buf[i] { + p.rows[offset+i][col].SetUint64(1) + } else { + p.rows[offset+i][col].SetUint64(0) + } + } +} + +func (p *ParquetParser) setInt96Data(readNum, col, offset int) { + // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 + // INT96 timestamp layout + // -------------------------- + // | 64 bit | 32 bit | + // --------------------------- + // | nano sec | julian day | + // --------------------------- + // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, + // where dt is a negative number but still legal in the context of Go. + // But it will cause errors or potential data inconsistency when importing. + buf := p.colBuffers[col].int96Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetString(buf[i].ToTime().Format(utcTimeLayout), "utf8mb4_bin") + } +} + +func (p *ParquetParser) Init() error { + p.curRowGroup, p.totalRowGroup = -1, p.reader.NumRowGroups() + + p.totalRows = int(p.reader.MetaData().NumRows) + + numCols := p.reader.MetaData().Schema.NumColumns() + p.colReaders = make([]file.ColumnChunkReader, numCols) + p.colBuffers = make([]readBuffer, numCols) + p.rows = make([][]types.Datum, batchReadRowSize) + for i := range p.rows { + p.rows[i] = make([]types.Datum, numCols) + } + + return nil +} + +func (p *ParquetParser) GetRow() ([]types.Datum, error) { + if p.curIdx >= p.avail { + read, err := p.readRows(batchReadRowSize) + if err != nil { + return nil, errors.Trace(err) + } + if read == 0 { + return nil, nil + } + p.curIdx, p.avail = 0, read + } + + row := p.rows[p.curIdx] + p.curIdx++ + return row, nil +} + +func (p *ParquetParser) readRows(num int) (int, error) { + readNum := min(num, p.totalRows-p.curRows) + if readNum == 0 { + return 0, nil + } + + read := 0 + for read < readNum { + // Move to next row group + if p.curRowInGroup == p.totalRowsInGroup { + p.curRowGroup++ + rowGroupReader := p.reader.RowGroup(p.curRowGroup) + var err error + for c := 0; c < len(p.colReaders); c++ { + p.colReaders[c], err = rowGroupReader.Column(c) + if err != nil { + return 0, errors.Trace(err) + } + } + p.curRowInGroup, p.totalRowsInGroup = 0, int(rowGroupReader.NumRows()) + } + + // Read in this group + curRead := min(readNum-read, p.totalRowsInGroup-p.curRowInGroup) + _, err := p.readInGroup(curRead, read) + if err != nil { + return 0, errors.Trace(err) + } + read += curRead + p.curRowInGroup += curRead + } + + p.curRows += readNum + return readNum, nil +} + +// Read num rows in current row group and store results +func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { + var ( + err error + total int64 + ) + + // Read data into buffers first + req := int64(num) + for i, col := range p.colReaders { + buf := p.colBuffers[i] + switch col.Type() { + case parquet.Types.FixedLenByteArray: + total, _, err = col.(*file.FixedLenByteArrayColumnChunkReader).ReadBatch(req, buf.fixedLenArrayBuffer, nil, nil) + case parquet.Types.Float: + total, _, err = col.(*file.Float32ColumnChunkReader).ReadBatch(req, buf.float32Buffer, nil, nil) + case parquet.Types.Double: + total, _, err = col.(*file.Float64ColumnChunkReader).ReadBatch(req, buf.float64Buffer, nil, nil) + case parquet.Types.ByteArray: + total, _, err = col.(*file.ByteArrayColumnChunkReader).ReadBatch(req, buf.byteArrayBuffer, nil, nil) + case parquet.Types.Int32: + total, _, err = col.(*file.Int32ColumnChunkReader).ReadBatch(req, buf.int32Buffer, nil, nil) + case parquet.Types.Int64: + total, _, err = col.(*file.Int64ColumnChunkReader).ReadBatch(req, buf.int64Buffer, nil, nil) + case parquet.Types.Int96: + total, _, err = col.(*file.Int96ColumnChunkReader).ReadBatch(req, buf.int96Buffer, nil, nil) + case parquet.Types.Boolean: + total, _, err = col.(*file.BooleanColumnChunkReader).ReadBatch(req, buf.boolBuffer, nil, nil) + } + + if err != nil { + return 0, errors.Trace(err) + } + + // Parse data according to converted type + if col.Type() == parquet.Types.Boolean { + p.setBoolData(num, i, dataOffset) + continue + } else if col.Type() == parquet.Types.Int96 { + p.setInt96Data(num, i, dataOffset) + continue + } + + meta := p.colMetas[i] + + switch meta.converted { + case schema.ConvertedTypes.BSON: + case schema.ConvertedTypes.JSON: + case schema.ConvertedTypes.UTF8: + case schema.ConvertedTypes.Enum: + p.setStringData(num, i, dataOffset) + case schema.ConvertedTypes.Int8: + case schema.ConvertedTypes.Int16: + case schema.ConvertedTypes.Int32: + p.setInt32Data(num, i, dataOffset) + case schema.ConvertedTypes.Uint8: + case schema.ConvertedTypes.Uint16: + case schema.ConvertedTypes.Uint32: + p.setUint32Data(num, i, dataOffset) + case schema.ConvertedTypes.Int64: + p.setInt64Data(num, i, dataOffset) + case schema.ConvertedTypes.Uint64: + p.setUint64Data(num, i, dataOffset) + case schema.ConvertedTypes.TimeMillis: + p.setTimeMillisData(num, i, dataOffset) + case schema.ConvertedTypes.TimeMicros: + p.setTimeMicrosData(num, i, dataOffset) + case schema.ConvertedTypes.TimestampMillis: + p.setTimestampMillisData(num, i, dataOffset) + case schema.ConvertedTypes.TimestampMicros: + p.setTimestampMicrosData(num, i, dataOffset) + case schema.ConvertedTypes.Date: + p.setDateData(num, i, dataOffset) + case schema.ConvertedTypes.Decimal: + p.setDecimalData(num, i, dataOffset) + } + } + + return int(total), err +} + +// Pos returns the currently row number of the parquet file +func (p *ParquetParser) Pos() (pos int64, rowID int64) { + return int64(p.curRows), p.lastRow.RowID +} + +// SetPos sets the position in a parquet file. +// It implements the Parser interface. +func (p *ParquetParser) SetPos(pos int64, rowID int64) error { + p.lastRow.RowID = rowID + if pos < int64(p.curRows) { + panic("don't support seek back yet") + } + + read := int(pos) - p.curRows + _, err := p.readRows(read) + return errors.Trace(err) +} + +// ScannedPos implements the Parser interface. +// For parquet it's parquet file's reader current position. +func (pp *ParquetParser) ScannedPos() (int64, error) { + return pp.readSeekCloser.Seek(0, io.SeekCurrent) +} + +// Close closes the parquet file of the parser. +// It implements the Parser interface. +func (pp *ParquetParser) Close() error { + return pp.reader.Close() +} + +// ReadRow reads a row in the parquet file by the parser. +// It implements the Parser interface. +func (p *ParquetParser) ReadRow() error { + p.lastRow.RowID++ + p.lastRow.Length = 0 + row, err := p.GetRow() + if err != nil { + return errors.Trace(err) + } + if row == nil { + return io.EOF + } + p.lastRow.Row = row + p.lastRow.Length = 0 + return nil +} + +// LastRow gets the last row parsed by the parser. +// It implements the Parser interface. +func (pp *ParquetParser) LastRow() Row { + return pp.lastRow +} + +// RecycleRow implements the Parser interface. +func (*ParquetParser) RecycleRow(_ Row) { +} + +// Columns returns the _lower-case_ column names corresponding to values in +// the LastRow. +func (pp *ParquetParser) Columns() []string { + return pp.columnNames +} + +// SetColumns set restored column names to parser +func (*ParquetParser) SetColumns(_ []string) { + // just do nothing +} + +// SetLogger sets the logger used in the parser. +// It implements the Parser interface. +func (pp *ParquetParser) SetLogger(l log.Logger) { + pp.logger = l +} + +// SetRowID sets the rowID in a parquet file when we start a compressed file. +// It implements the Parser interface. +func (pp *ParquetParser) SetRowID(rowID int64) { + pp.lastRow.RowID = rowID +} + // readerWrapper is a used for implement `source.ParquetFile` type readerWrapper struct { ReadSeekCloser @@ -162,30 +605,14 @@ func OpenParquetReader( // readParquetFileRowCount reads the parquet file row count. // It is a special func to fetch parquet file row count fast. +// TODO(joechnerh): implement this func readParquetFileRowCount( ctx context.Context, store storage.ExternalStorage, r storage.ReadSeekCloser, path string, ) (int64, error) { - wrapper := &readerWrapper{ - ReadSeekCloser: r, - store: store, - ctx: ctx, - path: path, - } - var err error - res := new(preader.ParquetReader) - res.NP = 1 - res.PFile = wrapper - if err = res.ReadFooter(); err != nil { - return 0, err - } - numRows := res.Footer.NumRows - if err = wrapper.Close(); err != nil { - return 0, err - } - return numRows, nil + return 0, nil } // ReadParquetFileRowCountByFile reads the parquet file row count through fileMeta. @@ -223,427 +650,37 @@ func NewParquetParser( } } - // FIXME: need to bench what the best value for the concurrent reader number - reader, err := preader.NewParquetReader(wrapper, nil, 2) + // TODO(joechenrh): use r + reader, err := file.OpenParquetFile(path, false) if err != nil { return nil, errors.Trace(err) } - columns := make([]string, 0, len(reader.Footer.Schema)-1) - columnMetas := make([]*parquet.SchemaElement, 0, len(reader.Footer.Schema)-1) - for i, c := range reader.SchemaHandler.SchemaElements { - if c.GetNumChildren() == 0 { - // we need to use the raw name, SchemaElement.Name might be prefixed with PARGO_PERFIX_ - columns = append(columns, strings.ToLower(reader.SchemaHandler.GetExName(i))) - // transfer old ConvertedType to LogicalType - columnMeta := c - if c.ConvertedType != nil && c.LogicalType == nil { - newMeta := *c - columnMeta = &newMeta - if err := convertToLogicType(columnMeta); err != nil { - return nil, err - } - } - columnMetas = append(columnMetas, columnMeta) - } - } + fileSchema := reader.MetaData().Schema + columnMetas := make([]convertedType, fileSchema.NumColumns()) + columnNames := make([]string, 0, fileSchema.NumColumns()) - return &ParquetParser{ - Reader: reader, - columns: columns, - columnMetas: columnMetas, - logger: log.FromContext(ctx), - readSeekCloser: wrapper, - }, nil -} - -func convertToLogicType(se *parquet.SchemaElement) error { - logicalType := &parquet.LogicalType{} - switch *se.ConvertedType { - case parquet.ConvertedType_UTF8: - logicalType.STRING = &parquet.StringType{} - case parquet.ConvertedType_ENUM: - logicalType.ENUM = &parquet.EnumType{} - case parquet.ConvertedType_DECIMAL: - logicalType.DECIMAL = &parquet.DecimalType{ - Scale: *se.Scale, - Precision: *se.Precision, - } - case parquet.ConvertedType_DATE: - logicalType.DATE = &parquet.DateType{} - case parquet.ConvertedType_TIME_MILLIS: - logicalType.TIME = &parquet.TimeType{ - IsAdjustedToUTC: true, - Unit: &parquet.TimeUnit{ - MILLIS: parquet.NewMilliSeconds(), - }, - } - case parquet.ConvertedType_TIME_MICROS: - logicalType.TIME = &parquet.TimeType{ - IsAdjustedToUTC: true, - Unit: &parquet.TimeUnit{ - MICROS: parquet.NewMicroSeconds(), - }, - } - case parquet.ConvertedType_TIMESTAMP_MILLIS: - logicalType.TIMESTAMP = &parquet.TimestampType{ - IsAdjustedToUTC: true, - Unit: &parquet.TimeUnit{ - MILLIS: parquet.NewMilliSeconds(), - }, - } - case parquet.ConvertedType_TIMESTAMP_MICROS: - logicalType.TIMESTAMP = &parquet.TimestampType{ - IsAdjustedToUTC: true, - Unit: &parquet.TimeUnit{ - MICROS: parquet.NewMicroSeconds(), - }, - } - case parquet.ConvertedType_UINT_8: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 8, - IsSigned: false, - } - case parquet.ConvertedType_UINT_16: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 16, - IsSigned: false, - } - case parquet.ConvertedType_UINT_32: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 32, - IsSigned: false, - } - case parquet.ConvertedType_UINT_64: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 64, - IsSigned: false, - } - case parquet.ConvertedType_INT_8: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 8, - IsSigned: true, - } - case parquet.ConvertedType_INT_16: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 16, - IsSigned: true, - } - case parquet.ConvertedType_INT_32: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 32, - IsSigned: true, - } - case parquet.ConvertedType_INT_64: - logicalType.INTEGER = &parquet.IntType{ - BitWidth: 64, - IsSigned: true, - } - case parquet.ConvertedType_JSON: - logicalType.JSON = &parquet.JsonType{} - case parquet.ConvertedType_BSON: - logicalType.BSON = &parquet.BsonType{} - // case parquet.ConvertedType_INTERVAL, parquet.ConvertedType_MAP, parquet.ConvertedType_MAP_KEY_VALUE, parquet.ConvertedType_LIST: - default: - return errors.Errorf("unsupported type: '%s'", *se.ConvertedType) - } - se.LogicalType = logicalType - return nil -} - -// Pos returns the currently row number of the parquet file -func (pp *ParquetParser) Pos() (pos int64, rowID int64) { - return pp.curStart + int64(pp.curIndex), pp.lastRow.RowID -} - -// SetPos sets the position in a parquet file. -// It implements the Parser interface. -func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { - if pos < pp.curStart { - panic("don't support seek back yet") - } - pp.lastRow.RowID = rowID - - if pos < pp.curStart+int64(len(pp.rows)) { - pp.curIndex = int(pos - pp.curStart) - pp.readRows = pos - return nil - } - - if pos > pp.curStart+int64(len(pp.rows)) { - if err := pp.Reader.SkipRows(pos - pp.curStart - int64(len(pp.rows))); err != nil { - return errors.Trace(err) - } - } - pp.curStart = pos - pp.readRows = pos - pp.curIndex = 0 - if len(pp.rows) > 0 { - pp.rows = pp.rows[:0] - } - - return nil -} - -// ScannedPos implements the Parser interface. -// For parquet it's parquet file's reader current position. -func (pp *ParquetParser) ScannedPos() (int64, error) { - return pp.readSeekCloser.Seek(0, io.SeekCurrent) -} - -// Close closes the parquet file of the parser. -// It implements the Parser interface. -func (pp *ParquetParser) Close() error { - pp.Reader.ReadStop() - return pp.Reader.PFile.Close() -} - -// ReadRow reads a row in the parquet file by the parser. -// It implements the Parser interface. -func (pp *ParquetParser) ReadRow() error { - pp.lastRow.RowID++ - pp.lastRow.Length = 0 - if pp.curIndex >= len(pp.rows) { - if pp.readRows >= pp.Reader.GetNumRows() { - return io.EOF - } - count := batchReadRowSize - if pp.Reader.GetNumRows()-pp.readRows < int64(count) { - count = int(pp.Reader.GetNumRows() - pp.readRows) - } - - var err error - pp.rows, err = pp.Reader.ReadByNumber(count) - if err != nil { - return errors.Trace(err) - } - pp.curStart = pp.readRows - pp.readRows += int64(len(pp.rows)) - pp.curIndex = 0 - } - - row := pp.rows[pp.curIndex] - pp.curIndex++ - - v := reflect.ValueOf(row) - length := v.NumField() - if cap(pp.lastRow.Row) < length { - pp.lastRow.Row = make([]types.Datum, length) - } else { - pp.lastRow.Row = pp.lastRow.Row[:length] - } - for i := 0; i < length; i++ { - pp.lastRow.Length += getDatumLen(v.Field(i)) - if err := setDatumValue(&pp.lastRow.Row[i], v.Field(i), pp.columnMetas[i], pp.logger); err != nil { - return err - } - } - return nil -} - -func getDatumLen(v reflect.Value) int { - if v.Kind() == reflect.Ptr { - if v.IsNil() { - return 0 - } - return getDatumLen(v.Elem()) - } - if v.Kind() == reflect.String { - return len(v.String()) - } - return 8 -} + for i := range columnMetas { + desc := reader.MetaData().Schema.Column(i) + columnNames = append(columnNames, strings.ToLower(desc.Name())) -// convert a parquet value to Datum -// -// See: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md -func setDatumValue(d *types.Datum, v reflect.Value, meta *parquet.SchemaElement, logger log.Logger) error { - switch v.Kind() { - case reflect.Bool: - if v.Bool() { - d.SetUint64(1) + logicalType := desc.LogicalType() + if logicalType.IsValid() { + columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() } else { - d.SetUint64(0) - } - case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - d.SetUint64(v.Uint()) - case reflect.Int8, reflect.Int16: - d.SetInt64(v.Int()) - case reflect.Int32, reflect.Int64: - return setDatumByInt(d, v.Int(), meta) - case reflect.String: - setDatumByString(d, v.String(), meta) - case reflect.Float32, reflect.Float64: - d.SetFloat64(v.Float()) - case reflect.Ptr: - if !v.IsNil() { - return setDatumValue(d, v.Elem(), meta, logger) - } - d.SetNull() - default: - logger.Error("unknown value", zap.Stringer("kind", v.Kind()), - zap.String("type", v.Type().Name()), zap.Reflect("value", v.Interface())) - return errors.Errorf("unknown value: %v", v) - } - return nil -} - -func setDatumByString(d *types.Datum, v string, meta *parquet.SchemaElement) { - if meta.LogicalType != nil && meta.LogicalType.DECIMAL != nil { - v = binaryToDecimalStr([]byte(v), int(meta.LogicalType.DECIMAL.Scale)) - } - if meta.Type != nil && *meta.Type == parquet.Type_INT96 && len(v) == 96/8 { - ts := int96ToTime([]byte(v)) - ts = ts.UTC() - v = ts.Format(utcTimeLayout) - } - d.SetString(v, "utf8mb4_bin") -} - -func binaryToDecimalStr(rawBytes []byte, scale int) string { - negative := rawBytes[0] > 127 - if negative { - for i := 0; i < len(rawBytes); i++ { - rawBytes[i] = ^rawBytes[i] - } - for i := len(rawBytes) - 1; i >= 0; i-- { - rawBytes[i]++ - if rawBytes[i] != 0 { - break - } + columnMetas[i].converted = desc.ConvertedType() + columnMetas[i].decimalMeta = desc.SchemaNode().(*schema.PrimitiveNode).DecimalMetadata() } } - intValue := big.NewInt(0) - intValue = intValue.SetBytes(rawBytes) - val := fmt.Sprintf("%0*d", scale, intValue) - dotIndex := len(val) - scale - var res strings.Builder - if negative { - res.WriteByte('-') - } - if dotIndex == 0 { - res.WriteByte('0') - } else { - res.WriteString(val[:dotIndex]) - } - if scale > 0 { - res.WriteByte('.') - res.WriteString(val[dotIndex:]) - } - return res.String() -} - -// when the value type is int32/int64, convert to value to target logical type in tidb -func setDatumByInt(d *types.Datum, v int64, meta *parquet.SchemaElement) error { - if meta.ConvertedType == nil && meta.LogicalType == nil { - d.SetInt64(v) - return nil - } - - logicalType := meta.LogicalType - switch { - case logicalType.DECIMAL != nil: - if logicalType.DECIMAL.Scale == 0 { - d.SetInt64(v) - return nil - } - minLen := logicalType.DECIMAL.Scale + 1 - if v < 0 { - minLen++ - } - val := fmt.Sprintf("%0*d", minLen, v) - dotIndex := len(val) - int(*meta.Scale) - d.SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") - case logicalType.DATE != nil: - dateStr := time.Unix(v*86400, 0).Format(time.DateOnly) - d.SetString(dateStr, "utf8mb4_bin") - case logicalType.TIMESTAMP != nil: - // convert all timestamp types (datetime/timestamp) to string - timeStr := formatTime(v, logicalType.TIMESTAMP.Unit, timeLayout, - utcTimeLayout, logicalType.TIMESTAMP.IsAdjustedToUTC) - d.SetString(timeStr, "utf8mb4_bin") - case logicalType.TIME != nil: - // convert all timestamp types (datetime/timestamp) to string - timeStr := formatTime(v, logicalType.TIME.Unit, "15:04:05.999999", "15:04:05.999999Z", - logicalType.TIME.IsAdjustedToUTC) - d.SetString(timeStr, "utf8mb4_bin") - default: - d.SetInt64(v) - } - return nil -} - -func formatTime(v int64, units *parquet.TimeUnit, format, utcFormat string, utc bool) string { - var t time.Time - if units.MICROS != nil { - t = time.UnixMicro(v) - } else if units.MILLIS != nil { - t = time.UnixMilli(v) - } else { - // nano - t = time.Unix(0, v) - } - t = t.UTC() - if utc { - return t.Format(utcFormat) + parser := &ParquetParser{ + reader: reader, + colMetas: columnMetas, + columnNames: columnNames, + logger: log.FromContext(ctx), + readSeekCloser: wrapper, } - return t.Format(format) -} - -// LastRow gets the last row parsed by the parser. -// It implements the Parser interface. -func (pp *ParquetParser) LastRow() Row { - return pp.lastRow -} - -// RecycleRow implements the Parser interface. -func (*ParquetParser) RecycleRow(_ Row) { -} - -// Columns returns the _lower-case_ column names corresponding to values in -// the LastRow. -func (pp *ParquetParser) Columns() []string { - return pp.columns -} - -// SetColumns set restored column names to parser -func (*ParquetParser) SetColumns(_ []string) { - // just do nothing -} - -// SetLogger sets the logger used in the parser. -// It implements the Parser interface. -func (pp *ParquetParser) SetLogger(l log.Logger) { - pp.logger = l -} - -// SetRowID sets the rowID in a parquet file when we start a compressed file. -// It implements the Parser interface. -func (pp *ParquetParser) SetRowID(rowID int64) { - pp.lastRow.RowID = rowID -} + parser.Init() -func jdToTime(jd int32, nsec int64) time.Time { - sec := int64(jd-jan011970) * secPerDay - // it's fine not to check the value of nsec - // because it's legall even though it exceeds the maximum. - // See TestNsecOutSideRange. - return time.Unix(sec, nsec) -} - -// FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 -// INT96 timestamp layout -// -------------------------- -// | 64 bit | 32 bit | -// --------------------------- -// | nano sec | julian day | -// --------------------------- -// NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, -// where dt is a negative number but still legal in the context of Go. -// But it will cause errors or potential data inconsistency when importing. -func int96ToTime(parquetDate []byte) time.Time { - nano := binary.LittleEndian.Uint64(parquetDate[:8]) - dt := binary.LittleEndian.Uint32(parquetDate[8:]) - return jdToTime(int32(dt), int64(nano)) + return parser, nil } diff --git a/pkg/lightning/mydump/parquet_parser_test.go b/pkg/lightning/mydump/parquet_parser_test.go index 2830686c2a29c..d4fcfc34cbe4f 100644 --- a/pkg/lightning/mydump/parquet_parser_test.go +++ b/pkg/lightning/mydump/parquet_parser_test.go @@ -140,7 +140,7 @@ func TestParquetVariousTypes(t *testing.T) { require.NoError(t, err) defer reader.Close() - require.Len(t, reader.columns, 9) + require.Len(t, reader.columnNames, 9) require.NoError(t, reader.ReadRow()) rowValue := []string{ From 364c13437bfa1edbd2eb8e052cfcc111a3ba0765 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 29 Nov 2024 10:22:36 +0800 Subject: [PATCH 02/93] Use custom arrow-go package --- go.mod | 11 ++++++----- go.sum | 8 ++++++++ pkg/lightning/mydump/parquet_parser.go | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 0b0be3555cdd3..2ef01e8c0fae5 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/YangKeao/go-mysql-driver v0.0.0-20240627104025-dd5589458cfa github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 - github.com/apache/arrow-go/v18 v18.0.0 + //github.com/joechenrh/arrow-go/v18 v18.0.0 github.com/apache/skywalking-eyes v0.4.0 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/ashanbrown/makezero v1.1.1 @@ -141,7 +141,7 @@ require ( golang.org/x/time v0.7.0 golang.org/x/tools v0.27.0 google.golang.org/api v0.169.0 - google.golang.org/grpc v1.67.1 + google.golang.org/grpc v1.63.2 gopkg.in/yaml.v2 v2.4.0 gorm.io/driver/mysql v1.5.7 gorm.io/gorm v1.25.11 @@ -163,8 +163,9 @@ require ( github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect + github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect @@ -306,8 +307,8 @@ require ( golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect - google.golang.org/protobuf v1.35.1 + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/protobuf v1.35.2 gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index ae1c56e2cf826..19750cefea4ff 100644 --- a/go.sum +++ b/go.sum @@ -512,6 +512,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 h1:f4ZzzvqUDuOBE+39uqS7RtlbdJ8nfW1Jm8eePeoVO8Y= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500/go.mod h1:qtkkdMnKrhq4O5anTsgfD5J+XXOwgMXVmmX4qQPvEgQ= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= @@ -549,6 +551,8 @@ github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= +github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -1355,6 +1359,8 @@ google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1: google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -1394,6 +1400,8 @@ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 26af68bd2af24..d306c7d7e6b59 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -29,9 +29,9 @@ import ( "github.com/pingcap/tidb/pkg/types" "github.com/xitongsys/parquet-go/source" - "github.com/apache/arrow-go/v18/parquet" - "github.com/apache/arrow-go/v18/parquet/file" - "github.com/apache/arrow-go/v18/parquet/schema" + "github.com/joechenrh/arrow-go/v18/parquet" + "github.com/joechenrh/arrow-go/v18/parquet/file" + "github.com/joechenrh/arrow-go/v18/parquet/schema" ) const ( From f3222f038dfb687d730ea127e383234ec3549900 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 29 Nov 2024 10:23:36 +0800 Subject: [PATCH 03/93] Update go deps --- go.mod | 5 +++-- go.sum | 12 ++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 2ef01e8c0fae5..6188e4026bd13 100644 --- a/go.mod +++ b/go.mod @@ -141,7 +141,7 @@ require ( golang.org/x/time v0.7.0 golang.org/x/tools v0.27.0 google.golang.org/api v0.169.0 - google.golang.org/grpc v1.63.2 + google.golang.org/grpc v1.64.1 gopkg.in/yaml.v2 v2.4.0 gorm.io/driver/mysql v1.5.7 gorm.io/gorm v1.25.11 @@ -151,6 +151,8 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) +require github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 + require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect @@ -163,7 +165,6 @@ require ( github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect diff --git a/go.sum b/go.sum index 19750cefea4ff..4c7f85d667872 100644 --- a/go.sum +++ b/go.sum @@ -103,8 +103,6 @@ github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHG github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/arrow-go/v18 v18.0.0 h1:1dBDaSbH3LtulTyOVYaBCHO3yVRwjV+TZaqn3g6V7ZM= -github.com/apache/arrow-go/v18 v18.0.0/go.mod h1:t6+cWRSmKgdQ6HsxisQjok+jBpKGhRDiqcf3p0p/F+A= github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg= github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw= github.com/apache/skywalking-eyes v0.4.0 h1:O13kdRU6FCEZevfD01mdhTgCZLLfPZIQ0GXZrLl7FpQ= @@ -549,8 +547,6 @@ github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= -github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -1357,8 +1353,6 @@ google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUE google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= @@ -1379,8 +1373,8 @@ google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTp google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= -google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= -google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/grpc v1.64.1 h1:LKtvyfbX3UGVPFcGqJ9ItpVWW6oN/2XqTxfAnwRRXiA= +google.golang.org/grpc v1.64.1/go.mod h1:hiQF4LFZelK2WKaP6W0L92zGHtiQdZxk8CrSdvyjeP0= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -1398,8 +1392,6 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From 0bab0fddf5d96a4bba2395809fae2a5928492249 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 4 Dec 2024 10:34:04 +0800 Subject: [PATCH 04/93] Update reader --- config.toml | 1 + pkg/executor/importer/import.go | 4 +-- pkg/lightning/mydump/parquet_parser.go | 48 +++++++++++++++++++------- 3 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 config.toml diff --git a/config.toml b/config.toml new file mode 100644 index 0000000000000..dd450e306bfff --- /dev/null +++ b/config.toml @@ -0,0 +1 @@ +server-version = "99.7.25-TiDB-v6.1" diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 6ef74ba91f0a3..01c4c7e772c69 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -394,8 +394,8 @@ func NewImportPlan(ctx context.Context, userSctx sessionctx.Context, plan *plann lineFieldsInfo := plannercore.LineFieldsInfo{ FieldsTerminatedBy: `,`, FieldsEnclosedBy: `"`, - FieldsEscapedBy: `\`, - LinesStartingBy: ``, + //FieldsEscapedBy: `\`, + LinesStartingBy: ``, // csv_parser will determine it automatically(either '\r' or '\n' or '\r\n') // But user cannot set this to empty explicitly. LinesTerminatedBy: ``, diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index d306c7d7e6b59..aa8e7e7d7826a 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -393,18 +393,11 @@ func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { meta := p.colMetas[i] switch meta.converted { - case schema.ConvertedTypes.BSON: - case schema.ConvertedTypes.JSON: - case schema.ConvertedTypes.UTF8: - case schema.ConvertedTypes.Enum: + case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: p.setStringData(num, i, dataOffset) - case schema.ConvertedTypes.Int8: - case schema.ConvertedTypes.Int16: - case schema.ConvertedTypes.Int32: + case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: p.setInt32Data(num, i, dataOffset) - case schema.ConvertedTypes.Uint8: - case schema.ConvertedTypes.Uint16: - case schema.ConvertedTypes.Uint32: + case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: p.setUint32Data(num, i, dataOffset) case schema.ConvertedTypes.Int64: p.setInt64Data(num, i, dataOffset) @@ -632,6 +625,36 @@ func ReadParquetFileRowCountByFile( return numberRows, nil } +type parquetFileOpener struct { + storage.ReadSeekCloser + lastOff int64 + bufSize int + buf []byte +} + +func (pf *parquetFileOpener) InitBuffer(bufSize int) { + pf.bufSize = bufSize + pf.buf = make([]byte, bufSize) +} + +func (pf *parquetFileOpener) ReadAt(p []byte, off int64) (n int, err error) { + // We want to minimize the number of Seek call as much as possible, + // since the underlying reader may require reopening the file. + gap := int(off - pf.lastOff) + if gap < 0 || gap > pf.bufSize { + if _, err := pf.Seek(off, io.SeekStart); err != nil { + return 0, err + } + } else { + pf.buf = pf.buf[:gap] + if _, err := pf.Read(pf.buf); err != nil { + return 0, err + } + } + + return pf.Read(p) +} + // NewParquetParser generates a parquet parser. func NewParquetParser( ctx context.Context, @@ -650,8 +673,9 @@ func NewParquetParser( } } - // TODO(joechenrh): use r - reader, err := file.OpenParquetFile(path, false) + nr := &parquetFileOpener{ReadSeekCloser: r} + nr.InitBuffer(64 * 1024) + reader, err := file.NewParquetReader(nr) if err != nil { return nil, errors.Trace(err) } From 3f155f6d15526cf04192486c66713b4aa2f44b0b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 4 Dec 2024 10:35:58 +0800 Subject: [PATCH 05/93] Fix --- config.toml | 1 - pkg/executor/importer/import.go | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 config.toml diff --git a/config.toml b/config.toml deleted file mode 100644 index dd450e306bfff..0000000000000 --- a/config.toml +++ /dev/null @@ -1 +0,0 @@ -server-version = "99.7.25-TiDB-v6.1" diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 01c4c7e772c69..6ef74ba91f0a3 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -394,8 +394,8 @@ func NewImportPlan(ctx context.Context, userSctx sessionctx.Context, plan *plann lineFieldsInfo := plannercore.LineFieldsInfo{ FieldsTerminatedBy: `,`, FieldsEnclosedBy: `"`, - //FieldsEscapedBy: `\`, - LinesStartingBy: ``, + FieldsEscapedBy: `\`, + LinesStartingBy: ``, // csv_parser will determine it automatically(either '\r' or '\n' or '\r\n') // But user cannot set this to empty explicitly. LinesTerminatedBy: ``, From ab23cd2e1df715328da02be6b450d9e4c10034e3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 9 Dec 2024 19:19:34 +0800 Subject: [PATCH 06/93] Update code --- pkg/lightning/mydump/parquet_parser.go | 338 ++++++++++++++----------- 1 file changed, 196 insertions(+), 142 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index aa8e7e7d7826a..84f92b4535fa6 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -27,7 +27,6 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/types" - "github.com/xitongsys/parquet-go/source" "github.com/joechenrh/arrow-go/v18/parquet" "github.com/joechenrh/arrow-go/v18/parquet/file" @@ -40,6 +39,7 @@ const ( // if a parquet if small than this threshold, parquet will load the whole file in a byte slice to // optimize the read performance smallParquetFileThreshold = 256 * 1024 * 1024 + defaultBufSize = 64 * 1024 // jan011970 is the date of unix epoch in julian day, jan011970 = 2440588 secPerDay = 24 * 60 * 60 @@ -61,6 +61,17 @@ type readBuffer struct { boolBuffer []bool } +func (rb *readBuffer) Init(size int) { + rb.fixedLenArrayBuffer = make([]parquet.FixedLenByteArray, size) + rb.float32Buffer = make([]float32, size) + rb.float64Buffer = make([]float64, size) + rb.byteArrayBuffer = make([]parquet.ByteArray, size) + rb.int32Buffer = make([]int32, size) + rb.int64Buffer = make([]int64, size) + rb.int96Buffer = make([]parquet.Int96, size) + rb.boolBuffer = make([]bool, size) +} + // convertedType is older representation of the logical type in parquet // ref: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md type convertedType struct { @@ -116,6 +127,104 @@ func formatTime(v int64, unit string, format, utcFormat string) string { return t.UTC().Format(utcFormat) } +// bytesReaderWrapper is a wrapper of bytes.Reader. +type bytesReaderWrapper struct { + *bytes.Reader + rawBytes []byte + // current file path + path string +} + +func (*bytesReaderWrapper) Close() error { + return nil +} + +func (*bytesReaderWrapper) Write(_ []byte) (n int, err error) { + return 0, errors.New("unsupported operation") +} + +func (r *bytesReaderWrapper) Open(name string) (storage.ReadSeekCloser, error) { + if len(name) > 0 && name != r.path { + panic(fmt.Sprintf("Open with a different name is not supported! current: '%s', new: '%s'", r.path, name)) + } + return &bytesReaderWrapper{ + Reader: bytes.NewReader(r.rawBytes), + rawBytes: r.rawBytes, + path: r.path, + }, nil +} + +// parquetFileWrapper is a wrapper for storage.ReadSeekCloser +// It implements io.ReaderAt interface to read parquet file using arrow-go. +type parquetFileWrapper struct { + ctx context.Context + + storage.ReadSeekCloser + lastOff int64 + bufSize int + buf []byte + + // current file path and store, used to open file + store storage.ExternalStorage + path string +} + +func (pf *parquetFileWrapper) InitBuffer(bufSize int) { + pf.bufSize = bufSize + pf.buf = make([]byte, bufSize) +} + +// ReadAt implemement ReaderAt interface +func (pf *parquetFileWrapper) ReadAt(p []byte, off int64) (int, error) { + // We want to minimize the number of Seek call as much as possible, + // since the underlying reader may require reopening the file. + gap := int(off - pf.lastOff) + if gap < 0 || gap > pf.bufSize { + if _, err := pf.Seek(off, io.SeekStart); err != nil { + return 0, err + } + } else { + pf.buf = pf.buf[:gap] + if _, err := pf.Read(pf.buf); err != nil { + return 0, err + } + } + + n, err := pf.Read(p) + pf.lastOff = off + int64(n) + return n, err +} + +// Seek implemement Seeker interface +func (pf *parquetFileWrapper) Seek(offset int64, whence int) (int64, error) { + newOffset, err := pf.ReadSeekCloser.Seek(offset, whence) + pf.lastOff = newOffset + return newOffset, err +} + +func (*parquetFileWrapper) Write(_ []byte) (n int, err error) { + return 0, errors.New("unsupported operation") +} + +func (pf *parquetFileWrapper) Open(name string) (storage.ReadSeekCloser, error) { + if len(name) == 0 { + name = pf.path + } + reader, err := pf.store.Open(pf.ctx, name, nil) + if err != nil { + return nil, errors.Trace(err) + } + + newPf := &parquetFileWrapper{ + ReadSeekCloser: reader, + store: pf.store, + ctx: pf.ctx, + path: name, + } + newPf.InitBuffer(64 * 1024) + return newPf, nil +} + // ParquetParser parses a parquet file for import // It implements the Parser interface. type ParquetParser struct { @@ -124,7 +233,7 @@ type ParquetParser struct { columnNames []string colReaders []file.ColumnChunkReader - colBuffers []readBuffer + colBuffers []*readBuffer rows [][]types.Datum curIdx int avail int @@ -137,10 +246,9 @@ type ParquetParser struct { curRows int totalRows int - lastRow Row - logger log.Logger - - readSeekCloser ReadSeekCloser + readBytes int64 + lastRow Row + logger log.Logger } func (p *ParquetParser) setStringData(readNum, col, offset int) { @@ -223,7 +331,7 @@ func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { decimal := p.colMetas[col].decimalMeta for i := 0; i < readNum; i++ { - if colTp == parquet.Types.Int32 || colTp == parquet.Types.Int32 { + if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { v := p.colBuffers[col].int64Buffer[i] if colTp == parquet.Types.Int32 { v = int64(p.colBuffers[col].int32Buffer[i]) @@ -261,6 +369,34 @@ func (p *ParquetParser) setBoolData(readNum, col, offset int) { } } +func (p *ParquetParser) setFloat32Data(readNum, col, offset int) { + buf := p.colBuffers[col].float32Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetFloat32(buf[i]) + } +} + +func (p *ParquetParser) setFloat64Data(readNum, col, offset int) { + buf := p.colBuffers[col].float64Buffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetFloat64(buf[i]) + } +} + +func (p *ParquetParser) setFixedByteArrayData(readNum, col, offset int) { + buf := p.colBuffers[col].fixedLenArrayBuffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") + } +} + +func (p *ParquetParser) setByteArrayData(readNum, col, offset int) { + buf := p.colBuffers[col].byteArrayBuffer + for i := 0; i < readNum; i++ { + p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") + } +} + func (p *ParquetParser) setInt96Data(readNum, col, offset int) { // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 // INT96 timestamp layout @@ -285,11 +421,15 @@ func (p *ParquetParser) Init() error { numCols := p.reader.MetaData().Schema.NumColumns() p.colReaders = make([]file.ColumnChunkReader, numCols) - p.colBuffers = make([]readBuffer, numCols) + p.colBuffers = make([]*readBuffer, numCols) p.rows = make([][]types.Datum, batchReadRowSize) for i := range p.rows { p.rows[i] = make([]types.Datum, numCols) } + for i := range p.colBuffers { + p.colBuffers[i] = &readBuffer{} + p.colBuffers[i].Init(batchReadRowSize) + } return nil } @@ -358,7 +498,8 @@ func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { req := int64(num) for i, col := range p.colReaders { buf := p.colBuffers[i] - switch col.Type() { + physicalTp := col.Type() + switch physicalTp { case parquet.Types.FixedLenByteArray: total, _, err = col.(*file.FixedLenByteArrayColumnChunkReader).ReadBatch(req, buf.fixedLenArrayBuffer, nil, nil) case parquet.Types.Float: @@ -381,17 +522,31 @@ func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { return 0, errors.Trace(err) } - // Parse data according to converted type - if col.Type() == parquet.Types.Boolean { - p.setBoolData(num, i, dataOffset) - continue - } else if col.Type() == parquet.Types.Int96 { - p.setInt96Data(num, i, dataOffset) + meta := p.colMetas[i] + + // If we can't get converted type, just use physical type + if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { + switch physicalTp { + case parquet.Types.Boolean: + p.setBoolData(num, i, dataOffset) + case parquet.Types.Int32: + p.setInt32Data(num, i, dataOffset) + case parquet.Types.Int64: + p.setInt64Data(num, i, dataOffset) + case parquet.Types.Int96: + p.setInt96Data(num, i, dataOffset) + case parquet.Types.Float: + p.setFloat32Data(num, i, dataOffset) + case parquet.Types.Double: + p.setFloat64Data(num, i, dataOffset) + case parquet.Types.ByteArray: + p.setByteArrayData(num, i, dataOffset) + case parquet.Types.FixedLenByteArray: + p.setFixedByteArrayData(num, i, dataOffset) + } continue } - meta := p.colMetas[i] - switch meta.converted { case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: p.setStringData(num, i, dataOffset) @@ -426,8 +581,9 @@ func (p *ParquetParser) Pos() (pos int64, rowID int64) { return int64(p.curRows), p.lastRow.RowID } -// SetPos sets the position in a parquet file. -// It implements the Parser interface. +// SetPos implements the Parser interface. +// For parquet file, this interface will read and discard the first `pos` rows, +// and set the current row ID to `rowID` func (p *ParquetParser) SetPos(pos int64, rowID int64) error { p.lastRow.RowID = rowID if pos < int64(p.curRows) { @@ -440,9 +596,10 @@ func (p *ParquetParser) SetPos(pos int64, rowID int64) error { } // ScannedPos implements the Parser interface. -// For parquet it's parquet file's reader current position. +// For parquet it's nonsense to read the position of internal reader, +// thus it will return the number of rows read func (pp *ParquetParser) ScannedPos() (int64, error) { - return pp.readSeekCloser.Seek(0, io.SeekCurrent) + return int64(pp.curRows), nil } // Close closes the parquet file of the parser. @@ -501,77 +658,13 @@ func (pp *ParquetParser) SetRowID(rowID int64) { pp.lastRow.RowID = rowID } -// readerWrapper is a used for implement `source.ParquetFile` -type readerWrapper struct { - ReadSeekCloser - store storage.ExternalStorage - ctx context.Context - // current file path - path string -} - -func (*readerWrapper) Write(_ []byte) (n int, err error) { - return 0, errors.New("unsupported operation") -} - -func (r *readerWrapper) Open(name string) (source.ParquetFile, error) { - if len(name) == 0 { - name = r.path - } - reader, err := r.store.Open(r.ctx, name, nil) - if err != nil { - return nil, errors.Trace(err) - } - return &readerWrapper{ - ReadSeekCloser: reader, - store: r.store, - ctx: r.ctx, - path: name, - }, nil -} - -func (*readerWrapper) Create(_ string) (source.ParquetFile, error) { - return nil, errors.New("unsupported operation") -} - -// bytesReaderWrapper is a wrapper of bytes.Reader used for implement `source.ParquetFile` -type bytesReaderWrapper struct { - *bytes.Reader - rawBytes []byte - // current file path - path string -} - -func (*bytesReaderWrapper) Close() error { - return nil -} - -func (*bytesReaderWrapper) Create(_ string) (source.ParquetFile, error) { - return nil, errors.New("unsupported operation") -} - -func (*bytesReaderWrapper) Write(_ []byte) (n int, err error) { - return 0, errors.New("unsupported operation") -} - -func (r *bytesReaderWrapper) Open(name string) (source.ParquetFile, error) { - if len(name) > 0 && name != r.path { - panic(fmt.Sprintf("Open with a different name is not supported! current: '%s', new: '%s'", r.path, name)) - } - return &bytesReaderWrapper{ - Reader: bytes.NewReader(r.rawBytes), - rawBytes: r.rawBytes, - path: r.path, - }, nil -} - // OpenParquetReader opens a parquet file and returns a handle that can at least read the file. func OpenParquetReader( ctx context.Context, store storage.ExternalStorage, path string, size int64, -) (source.ParquetFile, error) { +) (storage.ReadSeekCloser, error) { if size <= smallParquetFileThreshold { fileBytes, err := store.ReadFile(ctx, path) if err != nil { @@ -588,24 +681,15 @@ func OpenParquetReader( if err != nil { return nil, err } - return &readerWrapper{ + + pf := &parquetFileWrapper{ ReadSeekCloser: r, store: store, ctx: ctx, path: path, - }, nil -} - -// readParquetFileRowCount reads the parquet file row count. -// It is a special func to fetch parquet file row count fast. -// TODO(joechnerh): implement this -func readParquetFileRowCount( - ctx context.Context, - store storage.ExternalStorage, - r storage.ReadSeekCloser, - path string, -) (int64, error) { - return 0, nil + } + pf.InitBuffer(defaultBufSize) + return pf, nil } // ReadParquetFileRowCountByFile reads the parquet file row count through fileMeta. @@ -618,41 +702,13 @@ func ReadParquetFileRowCountByFile( if err != nil { return 0, errors.Trace(err) } - numberRows, err := readParquetFileRowCount(ctx, store, r, fileMeta.Path) + + reader, err := file.NewParquetReader(&parquetFileWrapper{ReadSeekCloser: r}) if err != nil { return 0, errors.Trace(err) } - return numberRows, nil -} -type parquetFileOpener struct { - storage.ReadSeekCloser - lastOff int64 - bufSize int - buf []byte -} - -func (pf *parquetFileOpener) InitBuffer(bufSize int) { - pf.bufSize = bufSize - pf.buf = make([]byte, bufSize) -} - -func (pf *parquetFileOpener) ReadAt(p []byte, off int64) (n int, err error) { - // We want to minimize the number of Seek call as much as possible, - // since the underlying reader may require reopening the file. - gap := int(off - pf.lastOff) - if gap < 0 || gap > pf.bufSize { - if _, err := pf.Seek(off, io.SeekStart); err != nil { - return 0, err - } - } else { - pf.buf = pf.buf[:gap] - if _, err := pf.Read(pf.buf); err != nil { - return 0, err - } - } - - return pf.Read(p) + return reader.MetaData().NumRows, nil } // NewParquetParser generates a parquet parser. @@ -662,20 +718,19 @@ func NewParquetParser( r storage.ReadSeekCloser, path string, ) (*ParquetParser, error) { - // check to avoid wrapping twice - wrapper, ok := r.(source.ParquetFile) + + wrapper, ok := r.(*parquetFileWrapper) if !ok { - wrapper = &readerWrapper{ + wrapper := &parquetFileWrapper{ ReadSeekCloser: r, store: store, ctx: ctx, path: path, } + wrapper.InitBuffer(64 * 1024) } - nr := &parquetFileOpener{ReadSeekCloser: r} - nr.InitBuffer(64 * 1024) - reader, err := file.NewParquetReader(nr) + reader, err := file.NewParquetReader(wrapper) if err != nil { return nil, errors.Trace(err) } @@ -698,11 +753,10 @@ func NewParquetParser( } parser := &ParquetParser{ - reader: reader, - colMetas: columnMetas, - columnNames: columnNames, - logger: log.FromContext(ctx), - readSeekCloser: wrapper, + reader: reader, + colMetas: columnMetas, + columnNames: columnNames, + logger: log.FromContext(ctx), } parser.Init() From 36b295c73815659891d6c1e28580c24018d7ac69 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 10 Dec 2024 14:23:27 +0800 Subject: [PATCH 07/93] Refine --- pkg/lightning/mydump/parquet_parser.go | 96 ++++++++++++++------------ 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 84f92b4535fa6..e042e44e1265c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -40,9 +40,6 @@ const ( // optimize the read performance smallParquetFileThreshold = 256 * 1024 * 1024 defaultBufSize = 64 * 1024 - // jan011970 is the date of unix epoch in julian day, - jan011970 = 2440588 - secPerDay = 24 * 60 * 60 utcTimeLayout = "2006-01-02 15:04:05.999999Z" timeLayout = "2006-01-02 15:04:05.999999" @@ -221,7 +218,7 @@ func (pf *parquetFileWrapper) Open(name string) (storage.ReadSeekCloser, error) ctx: pf.ctx, path: name, } - newPf.InitBuffer(64 * 1024) + newPf.InitBuffer(defaultBufSize) return newPf, nil } @@ -414,6 +411,7 @@ func (p *ParquetParser) setInt96Data(readNum, col, offset int) { } } +// Init initializes the Parquet parser and allocate necessary buffers func (p *ParquetParser) Init() error { p.curRowGroup, p.totalRowGroup = -1, p.reader.NumRowGroups() @@ -434,23 +432,7 @@ func (p *ParquetParser) Init() error { return nil } -func (p *ParquetParser) GetRow() ([]types.Datum, error) { - if p.curIdx >= p.avail { - read, err := p.readRows(batchReadRowSize) - if err != nil { - return nil, errors.Trace(err) - } - if read == 0 { - return nil, nil - } - p.curIdx, p.avail = 0, read - } - - row := p.rows[p.curIdx] - p.curIdx++ - return row, nil -} - +// readRows read several rows internally and store them in the row buffer. func (p *ParquetParser) readRows(num int) (int, error) { readNum := min(num, p.totalRows-p.curRows) if readNum == 0 { @@ -487,8 +469,10 @@ func (p *ParquetParser) readRows(num int) (int, error) { return readNum, nil } -// Read num rows in current row group and store results -func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { +// readInGroup read severals rows in current row group. +// storeOffset represents the starting position for storing the read rows. +// It's a part of the readRows. +func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { var ( err error total int64 @@ -528,48 +512,48 @@ func (p *ParquetParser) readInGroup(num, dataOffset int) (int, error) { if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { switch physicalTp { case parquet.Types.Boolean: - p.setBoolData(num, i, dataOffset) + p.setBoolData(num, i, storeOffset) case parquet.Types.Int32: - p.setInt32Data(num, i, dataOffset) + p.setInt32Data(num, i, storeOffset) case parquet.Types.Int64: - p.setInt64Data(num, i, dataOffset) + p.setInt64Data(num, i, storeOffset) case parquet.Types.Int96: - p.setInt96Data(num, i, dataOffset) + p.setInt96Data(num, i, storeOffset) case parquet.Types.Float: - p.setFloat32Data(num, i, dataOffset) + p.setFloat32Data(num, i, storeOffset) case parquet.Types.Double: - p.setFloat64Data(num, i, dataOffset) + p.setFloat64Data(num, i, storeOffset) case parquet.Types.ByteArray: - p.setByteArrayData(num, i, dataOffset) + p.setByteArrayData(num, i, storeOffset) case parquet.Types.FixedLenByteArray: - p.setFixedByteArrayData(num, i, dataOffset) + p.setFixedByteArrayData(num, i, storeOffset) } continue } switch meta.converted { case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: - p.setStringData(num, i, dataOffset) + p.setStringData(num, i, storeOffset) case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: - p.setInt32Data(num, i, dataOffset) + p.setInt32Data(num, i, storeOffset) case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: - p.setUint32Data(num, i, dataOffset) + p.setUint32Data(num, i, storeOffset) case schema.ConvertedTypes.Int64: - p.setInt64Data(num, i, dataOffset) + p.setInt64Data(num, i, storeOffset) case schema.ConvertedTypes.Uint64: - p.setUint64Data(num, i, dataOffset) + p.setUint64Data(num, i, storeOffset) case schema.ConvertedTypes.TimeMillis: - p.setTimeMillisData(num, i, dataOffset) + p.setTimeMillisData(num, i, storeOffset) case schema.ConvertedTypes.TimeMicros: - p.setTimeMicrosData(num, i, dataOffset) + p.setTimeMicrosData(num, i, storeOffset) case schema.ConvertedTypes.TimestampMillis: - p.setTimestampMillisData(num, i, dataOffset) + p.setTimestampMillisData(num, i, storeOffset) case schema.ConvertedTypes.TimestampMicros: - p.setTimestampMicrosData(num, i, dataOffset) + p.setTimestampMicrosData(num, i, storeOffset) case schema.ConvertedTypes.Date: - p.setDateData(num, i, dataOffset) + p.setDateData(num, i, storeOffset) case schema.ConvertedTypes.Decimal: - p.setDecimalData(num, i, dataOffset) + p.setDecimalData(num, i, storeOffset) } } @@ -596,8 +580,8 @@ func (p *ParquetParser) SetPos(pos int64, rowID int64) error { } // ScannedPos implements the Parser interface. -// For parquet it's nonsense to read the position of internal reader, -// thus it will return the number of rows read +// For parquet it's nonsense to get the position of internal reader, +// thus it will return the number of rows read. func (pp *ParquetParser) ScannedPos() (int64, error) { return int64(pp.curRows), nil } @@ -608,8 +592,29 @@ func (pp *ParquetParser) Close() error { return pp.reader.Close() } +// GetRow get the the current row. +// Return error if can't read next row. +// User should call ReadRow before calling this. +func (p *ParquetParser) GetRow() ([]types.Datum, error) { + if p.curIdx >= p.avail { + read, err := p.readRows(batchReadRowSize) + if err != nil { + return nil, errors.Trace(err) + } + if read == 0 { + return nil, nil + } + p.curIdx, p.avail = 0, read + } + + row := p.rows[p.curIdx] + p.curIdx++ + return row, nil +} + // ReadRow reads a row in the parquet file by the parser. // It implements the Parser interface. +// Return io.EOF if reaching the end of the file. func (p *ParquetParser) ReadRow() error { p.lastRow.RowID++ p.lastRow.Length = 0 @@ -718,7 +723,6 @@ func NewParquetParser( r storage.ReadSeekCloser, path string, ) (*ParquetParser, error) { - wrapper, ok := r.(*parquetFileWrapper) if !ok { wrapper := &parquetFileWrapper{ @@ -727,7 +731,7 @@ func NewParquetParser( ctx: ctx, path: path, } - wrapper.InitBuffer(64 * 1024) + wrapper.InitBuffer(defaultBufSize) } reader, err := file.NewParquetReader(wrapper) From 14284da86bdd0066cd0def722b02cb8d8a37ed7b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 10 Dec 2024 15:27:43 +0800 Subject: [PATCH 08/93] Refine --- pkg/lightning/mydump/parquet_parser.go | 80 +++++++++++++++++++------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index e042e44e1265c..af3ca127d6e0c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -110,7 +110,7 @@ func binaryToDecimalStr(rawBytes []byte, scale int) string { return res.String() } -func formatTime(v int64, unit string, format, utcFormat string) string { +func formatTime(v int64, unit string, format, utcFormat string, utc bool) string { var t time.Time switch unit { case "MICROS": @@ -121,7 +121,11 @@ func formatTime(v int64, unit string, format, utcFormat string) string { t = time.Unix(0, v) } - return t.UTC().Format(utcFormat) + t = t.UTC() + if utc { + return t.Format(utcFormat) + } + return t.Format(format) } // bytesReaderWrapper is a wrapper of bytes.Reader. @@ -140,7 +144,7 @@ func (*bytesReaderWrapper) Write(_ []byte) (n int, err error) { return 0, errors.New("unsupported operation") } -func (r *bytesReaderWrapper) Open(name string) (storage.ReadSeekCloser, error) { +func (r *bytesReaderWrapper) Open(name string) (parquet.ReaderAtSeeker, error) { if len(name) > 0 && name != r.path { panic(fmt.Sprintf("Open with a different name is not supported! current: '%s', new: '%s'", r.path, name)) } @@ -203,7 +207,7 @@ func (*parquetFileWrapper) Write(_ []byte) (n int, err error) { return 0, errors.New("unsupported operation") } -func (pf *parquetFileWrapper) Open(name string) (storage.ReadSeekCloser, error) { +func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) { if len(name) == 0 { name = pf.path } @@ -225,7 +229,7 @@ func (pf *parquetFileWrapper) Open(name string) (storage.ReadSeekCloser, error) // ParquetParser parses a parquet file for import // It implements the Parser interface. type ParquetParser struct { - reader *file.Reader + readers []*file.Reader colMetas []convertedType columnNames []string @@ -243,9 +247,8 @@ type ParquetParser struct { curRows int totalRows int - readBytes int64 - lastRow Row - logger log.Logger + lastRow Row + logger log.Logger } func (p *ParquetParser) setStringData(readNum, col, offset int) { @@ -286,7 +289,7 @@ func (p *ParquetParser) setUint64Data(readNum, col, offset int) { func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { buf := p.colBuffers[col].int32Buffer for i := 0; i < readNum; i++ { - timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z") + timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } @@ -294,7 +297,7 @@ func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { buf := p.colBuffers[col].int32Buffer for i := 0; i < readNum; i++ { - timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z") + timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } @@ -302,7 +305,7 @@ func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { buf := p.colBuffers[col].int64Buffer for i := 0; i < readNum; i++ { - timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout) + timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout, true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } @@ -310,7 +313,7 @@ func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { func (p *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { buf := p.colBuffers[col].int64Buffer for i := 0; i < readNum; i++ { - timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout) + timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout, true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } @@ -413,11 +416,11 @@ func (p *ParquetParser) setInt96Data(readNum, col, offset int) { // Init initializes the Parquet parser and allocate necessary buffers func (p *ParquetParser) Init() error { - p.curRowGroup, p.totalRowGroup = -1, p.reader.NumRowGroups() + p.curRowGroup, p.totalRowGroup = -1, p.readers[0].NumRowGroups() - p.totalRows = int(p.reader.MetaData().NumRows) + p.totalRows = int(p.readers[0].MetaData().NumRows) - numCols := p.reader.MetaData().Schema.NumColumns() + numCols := p.readers[0].MetaData().Schema.NumColumns() p.colReaders = make([]file.ColumnChunkReader, numCols) p.colBuffers = make([]*readBuffer, numCols) p.rows = make([][]types.Datum, batchReadRowSize) @@ -444,15 +447,15 @@ func (p *ParquetParser) readRows(num int) (int, error) { // Move to next row group if p.curRowInGroup == p.totalRowsInGroup { p.curRowGroup++ - rowGroupReader := p.reader.RowGroup(p.curRowGroup) var err error for c := 0; c < len(p.colReaders); c++ { + rowGroupReader := p.readers[c].RowGroup(p.curRowGroup) p.colReaders[c], err = rowGroupReader.Column(c) if err != nil { return 0, errors.Trace(err) } } - p.curRowInGroup, p.totalRowsInGroup = 0, int(rowGroupReader.NumRows()) + p.curRowInGroup, p.totalRowsInGroup = 0, int(p.readers[0].MetaData().RowGroups[p.curRowGroup].NumRows) } // Read in this group @@ -589,7 +592,12 @@ func (pp *ParquetParser) ScannedPos() (int64, error) { // Close closes the parquet file of the parser. // It implements the Parser interface. func (pp *ParquetParser) Close() error { - return pp.reader.Close() + for _, r := range pp.readers { + if err := r.Close(); err != nil { + return errors.Trace(err) + } + } + return nil } // GetRow get the the current row. @@ -633,6 +641,18 @@ func (p *ParquetParser) ReadRow() error { // LastRow gets the last row parsed by the parser. // It implements the Parser interface. func (pp *ParquetParser) LastRow() Row { + pp.lastRow.Length = 0 + for _, v := range pp.lastRow.Row { + if v.IsNull() { + continue + } + if v.Kind() == types.KindString { + // use GetBytes to avoid memory allocation + pp.lastRow.Length += len(v.GetBytes()) + } else { + pp.lastRow.Length += 8 + } + } return pp.lastRow } @@ -725,7 +745,7 @@ func NewParquetParser( ) (*ParquetParser, error) { wrapper, ok := r.(*parquetFileWrapper) if !ok { - wrapper := &parquetFileWrapper{ + wrapper = &parquetFileWrapper{ ReadSeekCloser: r, store: store, ctx: ctx, @@ -734,7 +754,10 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } - reader, err := file.NewParquetReader(wrapper) + prop := parquet.NewReaderProperties(nil) + prop.BufferedStreamEnabled = true + + reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) if err != nil { return nil, errors.Trace(err) } @@ -756,8 +779,23 @@ func NewParquetParser( } } + subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) + subreaders = append(subreaders, reader) + for i := 1; i < fileSchema.NumColumns(); i++ { + newWrapper, err := wrapper.Open("") + if err != nil { + return nil, errors.Trace(err) + } + reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + if err != nil { + return nil, errors.Trace(err) + } + + subreaders = append(subreaders, reader) + } + parser := &ParquetParser{ - reader: reader, + readers: subreaders, colMetas: columnMetas, columnNames: columnNames, logger: log.FromContext(ctx), From 010baa40801851386edde4ee81315173c45fd4ad Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 11 Dec 2024 15:35:47 +0800 Subject: [PATCH 09/93] Fix --- pkg/lightning/mydump/parquet_parser.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index af3ca127d6e0c..bb7fdedc8ee68 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -232,12 +232,18 @@ type ParquetParser struct { readers []*file.Reader colMetas []convertedType columnNames []string + readType int colReaders []file.ColumnChunkReader + + // colBuffers is used to store raw data read from parquet columns. + // rows stores the actual data after parsing. colBuffers []*readBuffer rows [][]types.Datum - curIdx int - avail int + + // curIdx and avail is the current index and total number of rows in rows buffer + curIdx int + avail int curRowGroup int totalRowGroup int @@ -565,7 +571,7 @@ func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { // Pos returns the currently row number of the parquet file func (p *ParquetParser) Pos() (pos int64, rowID int64) { - return int64(p.curRows), p.lastRow.RowID + return int64(p.curRows - p.avail + p.curIdx), p.lastRow.RowID } // SetPos implements the Parser interface. From 0e20195855db6a49f1972cdec6ebcd9eebd62b8c Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 13 Dec 2024 15:26:51 +0800 Subject: [PATCH 10/93] Add todo --- pkg/lightning/mydump/parquet_parser.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index bb7fdedc8ee68..7ddf809471769 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -788,15 +788,15 @@ func NewParquetParser( subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) subreaders = append(subreaders, reader) for i := 1; i < fileSchema.NumColumns(); i++ { - newWrapper, err := wrapper.Open("") - if err != nil { - return nil, errors.Trace(err) - } - reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) - if err != nil { - return nil, errors.Trace(err) - } - + // TODO(joechenrh): fix memory usage later + // newWrapper, err := wrapper.Open("") + // if err != nil { + // return nil, errors.Trace(err) + // } + // reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + // if err != nil { + // return nil, errors.Trace(err) + // } subreaders = append(subreaders, reader) } From 8216f06d4e9f153213c112cd037d36e47990cc1e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 16 Dec 2024 12:58:48 +0800 Subject: [PATCH 11/93] test --- pkg/lightning/mydump/parquet_parser.go | 229 ++++++++++++++----------- 1 file changed, 133 insertions(+), 96 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 7ddf809471769..18ed8857b04f6 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -34,7 +34,7 @@ import ( ) const ( - batchReadRowSize = 32 + defaultBatchSize = 128 // if a parquet if small than this threshold, parquet will load the whole file in a byte slice to // optimize the read performance @@ -45,28 +45,91 @@ const ( timeLayout = "2006-01-02 15:04:05.999999" ) -// Buffers to store data read from columns. -// Declare here to avoid frequent allocation. -type readBuffer struct { - fixedLenArrayBuffer []parquet.FixedLenByteArray - float32Buffer []float32 - float64Buffer []float64 - byteArrayBuffer []parquet.ByteArray - int32Buffer []int32 - int64Buffer []int64 - int96Buffer []parquet.Int96 - boolBuffer []bool -} - -func (rb *readBuffer) Init(size int) { - rb.fixedLenArrayBuffer = make([]parquet.FixedLenByteArray, size) - rb.float32Buffer = make([]float32, size) - rb.float64Buffer = make([]float64, size) - rb.byteArrayBuffer = make([]parquet.ByteArray, size) - rb.int32Buffer = make([]int32, size) - rb.int64Buffer = make([]int64, size) - rb.int96Buffer = make([]parquet.Int96, size) - rb.boolBuffer = make([]bool, size) +type Dumper struct { + reader file.ColumnChunkReader + batchSize int64 + valueOffset int + valuesBuffered int + + levelOffset int64 + levelsBuffered int64 + defLevels []int16 + repLevels []int16 + + valueBuffer interface{} +} + +func createDumper(tp parquet.Type) *Dumper { + batchSize := 128 + + var valueBuffer interface{} + switch tp { + case parquet.Types.Boolean: + valueBuffer = make([]bool, batchSize) + case parquet.Types.Int32: + valueBuffer = make([]int32, batchSize) + case parquet.Types.Int64: + valueBuffer = make([]int64, batchSize) + case parquet.Types.Float: + valueBuffer = make([]float32, batchSize) + case parquet.Types.Double: + valueBuffer = make([]float64, batchSize) + case parquet.Types.Int96: + valueBuffer = make([]parquet.Int96, batchSize) + case parquet.Types.ByteArray: + valueBuffer = make([]parquet.ByteArray, batchSize) + case parquet.Types.FixedLenByteArray: + valueBuffer = make([]parquet.FixedLenByteArray, batchSize) + } + + return &Dumper{ + batchSize: int64(batchSize), + defLevels: make([]int16, batchSize), + repLevels: make([]int16, batchSize), + valueBuffer: valueBuffer, + } +} + +func (dump *Dumper) Type() parquet.Type { + return dump.reader.Type() +} + +func (dump *Dumper) SetReader(colReader file.ColumnChunkReader) { + dump.reader = colReader + dump.valueOffset = 0 + dump.levelOffset = 0 +} + +func (dump *Dumper) readNextBatch() { + switch reader := dump.reader.(type) { + case *file.BooleanColumnChunkReader: + values := dump.valueBuffer.([]bool) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.Int32ColumnChunkReader: + values := dump.valueBuffer.([]int32) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.Int64ColumnChunkReader: + values := dump.valueBuffer.([]int64) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.Float32ColumnChunkReader: + values := dump.valueBuffer.([]float32) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.Float64ColumnChunkReader: + values := dump.valueBuffer.([]float64) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.Int96ColumnChunkReader: + values := dump.valueBuffer.([]parquet.Int96) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.ByteArrayColumnChunkReader: + values := dump.valueBuffer.([]parquet.ByteArray) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + case *file.FixedLenByteArrayColumnChunkReader: + values := dump.valueBuffer.([]parquet.FixedLenByteArray) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + } + + dump.valueOffset = 0 + dump.levelOffset = 0 } // convertedType is older representation of the logical type in parquet @@ -234,12 +297,10 @@ type ParquetParser struct { columnNames []string readType int - colReaders []file.ColumnChunkReader - // colBuffers is used to store raw data read from parquet columns. // rows stores the actual data after parsing. - colBuffers []*readBuffer - rows [][]types.Datum + dumpers []*Dumper + rows [][]types.Datum // curIdx and avail is the current index and total number of rows in rows buffer curIdx int @@ -258,42 +319,42 @@ type ParquetParser struct { } func (p *ParquetParser) setStringData(readNum, col, offset int) { - buf := p.colBuffers[col].byteArrayBuffer + buf := p.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetString(buf[i].String(), "utf8mb4_bin") } } func (p *ParquetParser) setInt32Data(readNum, col, offset int) { - buf := p.colBuffers[col].int32Buffer + buf := p.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetInt64(int64(buf[i])) } } func (p *ParquetParser) setUint32Data(readNum, col, offset int) { - buf := p.colBuffers[col].int32Buffer + buf := p.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetUint64(uint64(buf[i])) } } func (p *ParquetParser) setInt64Data(readNum, col, offset int) { - buf := p.colBuffers[col].int64Buffer + buf := p.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetInt64(int64(buf[i])) } } func (p *ParquetParser) setUint64Data(readNum, col, offset int) { - buf := p.colBuffers[col].int64Buffer + buf := p.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetUint64(uint64(buf[i])) } } func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { - buf := p.colBuffers[col].int32Buffer + buf := p.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") @@ -301,7 +362,7 @@ func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { } func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { - buf := p.colBuffers[col].int32Buffer + buf := p.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") @@ -309,7 +370,7 @@ func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { } func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { - buf := p.colBuffers[col].int64Buffer + buf := p.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout, true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") @@ -317,7 +378,7 @@ func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { } func (p *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { - buf := p.colBuffers[col].int64Buffer + buf := p.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout, true) p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") @@ -325,7 +386,7 @@ func (p *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { } func (p *ParquetParser) setDateData(readNum, col, offset int) { - buf := p.colBuffers[col].int32Buffer + buf := p.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { dateStr := time.Unix(int64(buf[i])*86400, 0).Format(time.DateOnly) p.rows[offset+i][col].SetString(dateStr, "utf8mb4_bin") @@ -333,14 +394,14 @@ func (p *ParquetParser) setDateData(readNum, col, offset int) { } func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { - colTp := p.colReaders[col].Type() + colTp := p.dumpers[col].Type() decimal := p.colMetas[col].decimalMeta for i := 0; i < readNum; i++ { if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { - v := p.colBuffers[col].int64Buffer[i] + v := p.dumpers[col].valueBuffer.([]int64)[i] if colTp == parquet.Types.Int32 { - v = int64(p.colBuffers[col].int32Buffer[i]) + v = int64(p.dumpers[col].valueBuffer.([]int32)[i]) } if !decimal.IsSet || decimal.Scale == 0 { p.rows[offset+i][col].SetInt64(v) @@ -354,10 +415,10 @@ func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { dotIndex := len(val) - int(decimal.Scale) p.rows[offset+i][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") } else if colTp == parquet.Types.FixedLenByteArray { - s := binaryToDecimalStr(p.colBuffers[col].fixedLenArrayBuffer[i], int(decimal.Scale)) + s := binaryToDecimalStr(p.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray)[i], int(decimal.Scale)) p.rows[offset+i][col].SetString(s, "utf8mb4_bin") } else { - s := binaryToDecimalStr(p.colBuffers[col].byteArrayBuffer[i], int(decimal.Scale)) + s := binaryToDecimalStr(p.dumpers[col].valueBuffer.([]parquet.ByteArray)[i], int(decimal.Scale)) p.rows[offset+i][col].SetString(s, "utf8mb4_bin") } } @@ -365,7 +426,7 @@ func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { } func (p *ParquetParser) setBoolData(readNum, col, offset int) { - buf := p.colBuffers[col].boolBuffer + buf := p.dumpers[col].valueBuffer.([]bool) for i := 0; i < readNum; i++ { if buf[i] { p.rows[offset+i][col].SetUint64(1) @@ -376,28 +437,28 @@ func (p *ParquetParser) setBoolData(readNum, col, offset int) { } func (p *ParquetParser) setFloat32Data(readNum, col, offset int) { - buf := p.colBuffers[col].float32Buffer + buf := p.dumpers[col].valueBuffer.([]float32) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetFloat32(buf[i]) } } func (p *ParquetParser) setFloat64Data(readNum, col, offset int) { - buf := p.colBuffers[col].float64Buffer + buf := p.dumpers[col].valueBuffer.([]float64) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetFloat64(buf[i]) } } func (p *ParquetParser) setFixedByteArrayData(readNum, col, offset int) { - buf := p.colBuffers[col].fixedLenArrayBuffer + buf := p.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") } } func (p *ParquetParser) setByteArrayData(readNum, col, offset int) { - buf := p.colBuffers[col].byteArrayBuffer + buf := p.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") } @@ -414,7 +475,7 @@ func (p *ParquetParser) setInt96Data(readNum, col, offset int) { // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, // where dt is a negative number but still legal in the context of Go. // But it will cause errors or potential data inconsistency when importing. - buf := p.colBuffers[col].int96Buffer + buf := p.dumpers[col].valueBuffer.([]parquet.Int96) for i := 0; i < readNum; i++ { p.rows[offset+i][col].SetString(buf[i].ToTime().Format(utcTimeLayout), "utf8mb4_bin") } @@ -422,20 +483,21 @@ func (p *ParquetParser) setInt96Data(readNum, col, offset int) { // Init initializes the Parquet parser and allocate necessary buffers func (p *ParquetParser) Init() error { + meta := p.readers[0].MetaData() + p.curRowGroup, p.totalRowGroup = -1, p.readers[0].NumRowGroups() - p.totalRows = int(p.readers[0].MetaData().NumRows) + p.totalRows = int(meta.NumRows) - numCols := p.readers[0].MetaData().Schema.NumColumns() - p.colReaders = make([]file.ColumnChunkReader, numCols) - p.colBuffers = make([]*readBuffer, numCols) - p.rows = make([][]types.Datum, batchReadRowSize) + numCols := meta.Schema.NumColumns() + p.rows = make([][]types.Datum, defaultBatchSize) for i := range p.rows { p.rows[i] = make([]types.Datum, numCols) } - for i := range p.colBuffers { - p.colBuffers[i] = &readBuffer{} - p.colBuffers[i].Init(batchReadRowSize) + + p.dumpers = make([]*Dumper, numCols) + for i := 0; i < numCols; i++ { + p.dumpers[i] = createDumper(meta.Schema.Column(i).PhysicalType()) } return nil @@ -453,13 +515,13 @@ func (p *ParquetParser) readRows(num int) (int, error) { // Move to next row group if p.curRowInGroup == p.totalRowsInGroup { p.curRowGroup++ - var err error - for c := 0; c < len(p.colReaders); c++ { + for c := 0; c < len(p.dumpers); c++ { rowGroupReader := p.readers[c].RowGroup(p.curRowGroup) - p.colReaders[c], err = rowGroupReader.Column(c) + colReader, err := rowGroupReader.Column(c) if err != nil { return 0, errors.Trace(err) } + p.dumpers[c].SetReader(colReader) } p.curRowInGroup, p.totalRowsInGroup = 0, int(p.readers[0].MetaData().RowGroups[p.curRowGroup].NumRows) } @@ -488,34 +550,10 @@ func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { ) // Read data into buffers first - req := int64(num) - for i, col := range p.colReaders { - buf := p.colBuffers[i] - physicalTp := col.Type() - switch physicalTp { - case parquet.Types.FixedLenByteArray: - total, _, err = col.(*file.FixedLenByteArrayColumnChunkReader).ReadBatch(req, buf.fixedLenArrayBuffer, nil, nil) - case parquet.Types.Float: - total, _, err = col.(*file.Float32ColumnChunkReader).ReadBatch(req, buf.float32Buffer, nil, nil) - case parquet.Types.Double: - total, _, err = col.(*file.Float64ColumnChunkReader).ReadBatch(req, buf.float64Buffer, nil, nil) - case parquet.Types.ByteArray: - total, _, err = col.(*file.ByteArrayColumnChunkReader).ReadBatch(req, buf.byteArrayBuffer, nil, nil) - case parquet.Types.Int32: - total, _, err = col.(*file.Int32ColumnChunkReader).ReadBatch(req, buf.int32Buffer, nil, nil) - case parquet.Types.Int64: - total, _, err = col.(*file.Int64ColumnChunkReader).ReadBatch(req, buf.int64Buffer, nil, nil) - case parquet.Types.Int96: - total, _, err = col.(*file.Int96ColumnChunkReader).ReadBatch(req, buf.int96Buffer, nil, nil) - case parquet.Types.Boolean: - total, _, err = col.(*file.BooleanColumnChunkReader).ReadBatch(req, buf.boolBuffer, nil, nil) - } - - if err != nil { - return 0, errors.Trace(err) - } - + for i, dumper := range p.dumpers { + dumper.readNextBatch() meta := p.colMetas[i] + physicalTp := dumper.Type() // If we can't get converted type, just use physical type if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { @@ -611,7 +649,7 @@ func (pp *ParquetParser) Close() error { // User should call ReadRow before calling this. func (p *ParquetParser) GetRow() ([]types.Datum, error) { if p.curIdx >= p.avail { - read, err := p.readRows(batchReadRowSize) + read, err := p.readRows(defaultBatchSize) if err != nil { return nil, errors.Trace(err) } @@ -788,15 +826,14 @@ func NewParquetParser( subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) subreaders = append(subreaders, reader) for i := 1; i < fileSchema.NumColumns(); i++ { - // TODO(joechenrh): fix memory usage later - // newWrapper, err := wrapper.Open("") - // if err != nil { - // return nil, errors.Trace(err) - // } - // reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) - // if err != nil { - // return nil, errors.Trace(err) - // } + newWrapper, err := wrapper.Open("") + if err != nil { + return nil, errors.Trace(err) + } + reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + if err != nil { + return nil, errors.Trace(err) + } subreaders = append(subreaders, reader) } From 837c7d1b01bb9291cc9707771db1e203943339d3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 16 Dec 2024 16:34:11 +0800 Subject: [PATCH 12/93] Update parser and memory usage estimation --- go.mod | 20 +++---- go.sum | 40 ++++++------- lightning/pkg/importer/table_import.go | 15 +---- pkg/lightning/mydump/parquet_parser.go | 82 +++++++++++++++++++------- 4 files changed, 95 insertions(+), 62 deletions(-) diff --git a/go.mod b/go.mod index 6188e4026bd13..dd12d4c00ee7f 100644 --- a/go.mod +++ b/go.mod @@ -106,7 +106,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/spkg/bom v1.0.0 github.com/stathat/consistent v1.0.0 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 github.com/tdakkota/asciicheck v0.2.0 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 github.com/tidwall/btree v1.7.0 @@ -132,14 +132,14 @@ require ( go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 - golang.org/x/net v0.31.0 + golang.org/x/net v0.32.0 golang.org/x/oauth2 v0.23.0 - golang.org/x/sync v0.9.0 - golang.org/x/sys v0.27.0 - golang.org/x/term v0.26.0 - golang.org/x/text v0.20.0 + golang.org/x/sync v0.10.0 + golang.org/x/sys v0.28.0 + golang.org/x/term v0.27.0 + golang.org/x/text v0.21.0 golang.org/x/time v0.7.0 - golang.org/x/tools v0.27.0 + golang.org/x/tools v0.28.0 google.golang.org/api v0.169.0 google.golang.org/grpc v1.64.1 gopkg.in/yaml.v2 v2.4.0 @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 +require github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d require ( filippo.io/edwards25519 v1.1.0 // indirect @@ -213,7 +213,7 @@ require ( github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect - github.com/goccy/go-json v0.10.3 // indirect + github.com/goccy/go-json v0.10.4 // indirect github.com/golang-jwt/jwt/v4 v4.5.1 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect github.com/golang/glog v1.2.2 // indirect @@ -302,7 +302,7 @@ require ( go.opentelemetry.io/otel/sdk v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.1.0 // indirect - golang.org/x/crypto v0.29.0 // indirect + golang.org/x/crypto v0.30.0 // indirect golang.org/x/exp/typeparams v0.0.0-20240909161429-701f63a606c0 // indirect golang.org/x/mod v0.22.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect diff --git a/go.sum b/go.sum index 4c7f85d667872..553df0dc2de01 100644 --- a/go.sum +++ b/go.sum @@ -305,8 +305,8 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= -github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM= +github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/goccy/go-reflect v1.2.0 h1:O0T8rZCuNmGXewnATuKYnkL0xm6o8UNOJZd/gOkb9ms= github.com/goccy/go-reflect v1.2.0/go.mod h1:n0oYZn8VcV2CkWTxi8B9QjkCoq6GTtCEdfmR66YhFtE= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -510,8 +510,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500 h1:f4ZzzvqUDuOBE+39uqS7RtlbdJ8nfW1Jm8eePeoVO8Y= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241129015824-a71d1f023500/go.mod h1:qtkkdMnKrhq4O5anTsgfD5J+XXOwgMXVmmX4qQPvEgQ= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d h1:1uBoAYSaHFH+dOAGmcDlLEuV5zoCbm48q128LRD/fH8= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= @@ -816,8 +816,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tdakkota/asciicheck v0.2.0 h1:o8jvnUANo0qXtnslk2d3nMKTFNlOnJjRrNcj0j9qkHM= github.com/tdakkota/asciicheck v0.2.0/go.mod h1:Qb7Y9EgjCLJGup51gDHFzbI08/gbGhL/UVhYIPWG2rg= github.com/tenntenn/modver v1.0.1 h1:2klLppGhDgzJrScMpkj9Ujy3rXPUspSjAcev9tSEBgA= @@ -972,8 +972,8 @@ golang.org/x/crypto v0.0.0-20220518034528-6f7dac969898/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= -golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= -golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/crypto v0.30.0 h1:RwoQn3GkWiMkzlX562cLB7OxWvjH1L8xutO2WoJcRoY= +golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1082,8 +1082,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= -golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= -golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1106,8 +1106,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180816055513-1c9583448a9c/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1172,8 +1172,8 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1183,8 +1183,8 @@ golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= -golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= -golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1198,8 +1198,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1270,8 +1270,8 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= -golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= -golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= +golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= +golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 20c5a0b4dfd1c..35bb57feb5ccb 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -29,7 +29,6 @@ import ( dmysql "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/lightning/pkg/web" "github.com/pingcap/tidb/pkg/errno" @@ -784,10 +783,11 @@ ChunkLoop: } if chunk.FileMeta.Type == mydump.SourceTypeParquet { - // TODO: use the compressed size of the chunk to conduct memory control - if _, err = getChunkCompressedSizeForParquet(ctx, chunk, rc.store); err != nil { + pp := cr.parser.(*mydump.ParquetParser) + if _, err := pp.ReadRows(64); err != nil { return nil, errors.Trace(err) } + _ = pp.GetMemoryUage() } restoreWorker := rc.regionWorkers.Apply() @@ -1196,15 +1196,6 @@ func (tr *TableImporter) postProcess( return true, nil } -// TODO(joechenrh): remove this function -func getChunkCompressedSizeForParquet( - ctx context.Context, - chunk *checkpoints.ChunkCheckpoint, - store storage.ExternalStorage, -) (int64, error) { - return 100, nil -} - func updateStatsMeta(ctx context.Context, db *sql.DB, tableID int64, count int) { s := common.SQLWithRetry{ DB: db, diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 18ed8857b04f6..5c1e137d45059 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -21,6 +21,7 @@ import ( "io" "math/big" "strings" + "sync/atomic" "time" "github.com/pingcap/errors" @@ -28,6 +29,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/types" + "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/joechenrh/arrow-go/v18/parquet" "github.com/joechenrh/arrow-go/v18/parquet/file" "github.com/joechenrh/arrow-go/v18/parquet/schema" @@ -45,6 +47,29 @@ const ( timeLayout = "2006-01-02 15:04:05.999999" ) +type allocatorWithStats struct { + baseAllocator memory.Allocator + allocated atomic.Int64 +} + +func (a *allocatorWithStats) Allocate(size int) []byte { + b := a.baseAllocator.Allocate(size) + a.allocated.Add(int64(cap(b))) + return b +} + +func (a *allocatorWithStats) Reallocate(size int, b []byte) []byte { + return a.baseAllocator.Reallocate(size, b) +} + +func (a *allocatorWithStats) Free(b []byte) { + a.baseAllocator.Free(b) +} + +func (a *allocatorWithStats) Allocated() int64 { + return a.allocated.Load() +} + type Dumper struct { reader file.ColumnChunkReader batchSize int64 @@ -100,36 +125,37 @@ func (dump *Dumper) SetReader(colReader file.ColumnChunkReader) { dump.levelOffset = 0 } -func (dump *Dumper) readNextBatch() { +func (dump *Dumper) readNextBatch(req int64) int { switch reader := dump.reader.(type) { case *file.BooleanColumnChunkReader: values := dump.valueBuffer.([]bool) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int32ColumnChunkReader: values := dump.valueBuffer.([]int32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int64ColumnChunkReader: values := dump.valueBuffer.([]int64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Float32ColumnChunkReader: values := dump.valueBuffer.([]float32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Float64ColumnChunkReader: values := dump.valueBuffer.([]float64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int96ColumnChunkReader: values := dump.valueBuffer.([]parquet.Int96) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.ByteArrayColumnChunkReader: values := dump.valueBuffer.([]parquet.ByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.FixedLenByteArrayColumnChunkReader: values := dump.valueBuffer.([]parquet.FixedLenByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) } dump.valueOffset = 0 dump.levelOffset = 0 + return int(dump.levelsBuffered) } // convertedType is older representation of the logical type in parquet @@ -295,7 +321,8 @@ type ParquetParser struct { readers []*file.Reader colMetas []convertedType columnNames []string - readType int + + alloc *allocatorWithStats // colBuffers is used to store raw data read from parquet columns. // rows stores the actual data after parsing. @@ -318,6 +345,17 @@ type ParquetParser struct { logger log.Logger } +// GetMemoryUage estimate the memory usage for this file. +func (p *ParquetParser) GetMemoryUage() int64 { + // The reason for multiplying by six is as follows: + // 1. The file reader requires a buffer to accommodate at least one data page. + // 2. The page reader needs two buffers: one for storing compressed data and another for uncompressed data. + // 3. Only the uncompressed data will be recorded in the parser. + // 4. When moving to the next row group, we may allocate three additional buffers. + // Therefore, we multiply the memory usage by six to estimate the memory usage. + return p.alloc.Allocated() * 6 +} + func (p *ParquetParser) setStringData(readNum, col, offset int) { buf := p.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { @@ -503,8 +541,8 @@ func (p *ParquetParser) Init() error { return nil } -// readRows read several rows internally and store them in the row buffer. -func (p *ParquetParser) readRows(num int) (int, error) { +// ReadRows read several rows internally and store them in the row buffer. +func (p *ParquetParser) ReadRows(num int) (int, error) { readNum := min(num, p.totalRows-p.curRows) if readNum == 0 { return 0, nil @@ -537,21 +575,22 @@ func (p *ParquetParser) readRows(num int) (int, error) { } p.curRows += readNum + p.curIdx, p.avail = 0, readNum return readNum, nil } // readInGroup read severals rows in current row group. // storeOffset represents the starting position for storing the read rows. -// It's a part of the readRows. +// It's a part of the ReadRows. func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { var ( err error - total int64 + total int ) // Read data into buffers first for i, dumper := range p.dumpers { - dumper.readNextBatch() + total = dumper.readNextBatch(int64(num)) meta := p.colMetas[i] physicalTp := dumper.Type() @@ -604,7 +643,7 @@ func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { } } - return int(total), err + return total, err } // Pos returns the currently row number of the parquet file @@ -621,8 +660,10 @@ func (p *ParquetParser) SetPos(pos int64, rowID int64) error { panic("don't support seek back yet") } + // Read and discard these rows read := int(pos) - p.curRows - _, err := p.readRows(read) + _, err := p.ReadRows(read) + p.curIdx, p.avail = 0, 0 return errors.Trace(err) } @@ -649,14 +690,13 @@ func (pp *ParquetParser) Close() error { // User should call ReadRow before calling this. func (p *ParquetParser) GetRow() ([]types.Datum, error) { if p.curIdx >= p.avail { - read, err := p.readRows(defaultBatchSize) + read, err := p.ReadRows(defaultBatchSize) if err != nil { return nil, errors.Trace(err) } if read == 0 { return nil, nil } - p.curIdx, p.avail = 0, read } row := p.rows[p.curIdx] @@ -798,7 +838,8 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } - prop := parquet.NewReaderProperties(nil) + alloc := &allocatorWithStats{baseAllocator: memory.DefaultAllocator} + prop := parquet.NewReaderProperties(alloc) prop.BufferedStreamEnabled = true reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) @@ -841,6 +882,7 @@ func NewParquetParser( readers: subreaders, colMetas: columnMetas, columnNames: columnNames, + alloc: alloc, logger: log.FromContext(ctx), } parser.Init() From dff3f671ed284626133dfd8a97096a3f58e603e7 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 16 Dec 2024 16:52:28 +0800 Subject: [PATCH 13/93] Add memory limiter --- lightning/pkg/importer/chunk_process.go | 12 ++++++++--- lightning/pkg/importer/table_import.go | 28 ++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index 2b7d584d5237a..40597bac3fab4 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/keyspace" "github.com/pingcap/tidb/pkg/lightning/backend" @@ -50,9 +51,11 @@ import ( // for local backend it encodes and writes KV to local disk // for tidb backend it transforms data into sql and executes them. type chunkProcessor struct { - parser mydump.Parser - index int - chunk *checkpoints.ChunkCheckpoint + parser mydump.Parser + index int + chunk *checkpoints.ChunkCheckpoint + memLimiter *membuf.Limiter + memoryUsage int } func newChunkProcessor( @@ -776,5 +779,8 @@ func (*chunkProcessor) maybeSaveCheckpoint( } func (cr *chunkProcessor) close() { + if cr.memLimiter != nil { + cr.memLimiter.Release(cr.memoryUsage) + } _ = cr.parser.Close() } diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 35bb57feb5ccb..52dcc3d38257f 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -20,6 +20,7 @@ import ( "database/sql" "encoding/hex" "fmt" + "math" "path/filepath" "slices" "strings" @@ -29,6 +30,7 @@ import ( dmysql "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/lightning/pkg/web" "github.com/pingcap/tidb/pkg/errno" @@ -52,6 +54,7 @@ import ( "github.com/pingcap/tidb/pkg/table/tables" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/extsort" + "github.com/pingcap/tidb/pkg/util/memory" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/multierr" "go.uber.org/zap" @@ -59,6 +62,18 @@ import ( "google.golang.org/grpc/status" ) +var memLimiter *membuf.Limiter + +func init() { + memTotal, err := memory.MemTotal() + if err != nil { + // Set limit to int max, which means no limiter + memTotal = math.MaxInt32 + } + // TODO(joechenrh): set a more proper waterline + memLimiter = membuf.NewLimiter(int(memTotal / 5 * 4)) +} + // TableImporter is a helper struct to import a table. type TableImporter struct { // The unique table name in the form "`db`.`tbl`". @@ -709,6 +724,8 @@ func (tr *TableImporter) preprocessEngine( metrics, _ := metric.FromContext(ctx) + maxMemoryUsage := 0 + // Restore table data ChunkLoop: for chunkIndex, chunk := range cp.Chunks { @@ -782,12 +799,21 @@ ChunkLoop: break } + // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { + // To avoid OOM during file opening, we update the waterline before reading. + memLimiter.Acquire(maxMemoryUsage) pp := cr.parser.(*mydump.ParquetParser) if _, err := pp.ReadRows(64); err != nil { return nil, errors.Trace(err) } - _ = pp.GetMemoryUage() + memoryUsage := int(pp.GetMemoryUage()) + memLimiter.Release(maxMemoryUsage) + + memLimiter.Acquire(memoryUsage) + cr.memLimiter = memLimiter + cr.memoryUsage = memoryUsage + maxMemoryUsage = max(maxMemoryUsage, memoryUsage) } restoreWorker := rc.regionWorkers.Apply() From b3936706e27486ed78d70d9ec2da5edc14ec24ab Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 16 Dec 2024 18:09:31 +0800 Subject: [PATCH 14/93] Update go mod --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 73410f011c135..6b0b938a835cb 100644 --- a/go.mod +++ b/go.mod @@ -308,7 +308,7 @@ require ( golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect google.golang.org/protobuf v1.35.2 gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect diff --git a/go.sum b/go.sum index 0aed9bf655362..8a1c13c5609b5 100644 --- a/go.sum +++ b/go.sum @@ -1351,8 +1351,8 @@ google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUEr4jDysRDLrm4PHePlge4v4TGAlxY= google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= -google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= -google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= From 25433c14a488b94439b925e0a8b707f71a53df0f Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 16 Dec 2024 18:46:53 +0800 Subject: [PATCH 15/93] Fix bazel --- DEPS.bzl | 412 ++++++++++++++++++++++------- lightning/pkg/importer/BUILD.bazel | 2 + pkg/lightning/mydump/BUILD.bazel | 7 +- 3 files changed, 316 insertions(+), 105 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index e826ae2f62c06..d206e208ac85b 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -186,6 +186,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/alecthomas/kingpin/v2/com_github_alecthomas_kingpin_v2-v2.4.0.zip", ], ) + go_repository( + name = "com_github_alecthomas_participle_v2", + build_file_proto_mode = "disable_global", + importpath = "github.com/alecthomas/participle/v2", + sha256 = "257ab6b73198005370511b9677004134374f41464eb3731298c38c1b768b1218", + strip_prefix = "github.com/alecthomas/participle/v2@v2.1.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", + "http://ats.apps.svc/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", + ], + ) go_repository( name = "com_github_alecthomas_units", build_file_proto_mode = "disable_global", @@ -268,13 +281,13 @@ def go_deps(): name = "com_github_andybalholm_brotli", build_file_proto_mode = "disable_global", importpath = "github.com/andybalholm/brotli", - sha256 = "f5ae9b2f3260a22ff3f3445fff081d3ef12ee1aa3c0b87eadc59b5a8fb2cdef0", - strip_prefix = "github.com/andybalholm/brotli@v1.0.5", + sha256 = "d183c2a1277d2784861bfa89e3903ecba31ef08237c3e92fbdfd4ab0b551154e", + strip_prefix = "github.com/andybalholm/brotli@v1.1.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.0.5.zip", - "http://ats.apps.svc/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.0.5.zip", - "https://cache.hawkingrei.com/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.0.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.0.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.1.1.zip", + "http://ats.apps.svc/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.1.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.1.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/andybalholm/brotli/com_github_andybalholm_brotli-v1.1.1.zip", ], ) go_repository( @@ -359,13 +372,13 @@ def go_deps(): name = "com_github_apache_thrift", build_file_proto_mode = "disable_global", importpath = "github.com/apache/thrift", - sha256 = "50d5c610df30fa2a6039394d5142382b7d9938870dfb12ef46bddfa3da250893", - strip_prefix = "github.com/apache/thrift@v0.16.0", + sha256 = "c96231bd10eb2488974c977c1f1bafdce90ec56710d193f54f5771a379835a99", + strip_prefix = "github.com/apache/thrift@v0.21.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.16.0.zip", - "http://ats.apps.svc/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.16.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.16.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.16.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.21.0.zip", + "http://ats.apps.svc/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.21.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.21.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/apache/thrift/com_github_apache_thrift-v0.21.0.zip", ], ) go_repository( @@ -1087,13 +1100,13 @@ def go_deps(): name = "com_github_cncf_xds_go", build_file_proto_mode = "disable_global", importpath = "github.com/cncf/xds/go", - sha256 = "ab0d2fd980b15a582708a728cf8080ebb88778e59f3003b67c6aafaa9ad0f447", - strip_prefix = "github.com/cncf/xds/go@v0.0.0-20231128003011-0fa0005c9caa", + sha256 = "7395d4a588bcabf822f2347b647b66853a14a98088dd1ea0582cfa7a241c4234", + strip_prefix = "github.com/cncf/xds/go@v0.0.0-20240318125728-8a4994d93e50", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", - "http://ats.apps.svc/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", - "https://cache.hawkingrei.com/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", + "http://ats.apps.svc/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", + "https://cache.hawkingrei.com/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", ], ) go_repository( @@ -1369,6 +1382,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/creack/pty/com_github_creack_pty-v1.1.11.zip", ], ) + go_repository( + name = "com_github_creasty_defaults", + build_file_proto_mode = "disable_global", + importpath = "github.com/creasty/defaults", + sha256 = "d9984bcd4b7326a6066f58bc94b46fe8657e50e1ba0a3ef6eb592b0ff96e6712", + strip_prefix = "github.com/creasty/defaults@v1.8.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "http://ats.apps.svc/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + ], + ) go_repository( name = "com_github_crocmagnon_fatcontext", build_file_proto_mode = "disable_global", @@ -2699,13 +2725,13 @@ def go_deps(): name = "com_github_goccy_go_json", build_file_proto_mode = "disable_global", importpath = "github.com/goccy/go-json", - sha256 = "ed9043ee01cc46557c74bcecc625db37ffe3a5c7af219f390a287f44a40c2520", - strip_prefix = "github.com/goccy/go-json@v0.10.2", + sha256 = "d89fa5215f5c2e0077b67dbc6e043db1b6427e93bdf5faacb7c536afca95e454", + strip_prefix = "github.com/goccy/go-json@v0.10.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.2.zip", - "http://ats.apps.svc/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.2.zip", - "https://cache.hawkingrei.com/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.4.zip", + "http://ats.apps.svc/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.4.zip", + "https://cache.hawkingrei.com/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.4.zip", ], ) go_repository( @@ -2721,6 +2747,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-reflect/com_github_goccy_go_reflect-v1.2.0.zip", ], ) + go_repository( + name = "com_github_goccy_go_yaml", + build_file_proto_mode = "disable_global", + importpath = "github.com/goccy/go-yaml", + sha256 = "13a7174686c1e9a053a29c848016fb2ed7a39b6befea6db085e8b5d51990d0ee", + strip_prefix = "github.com/goccy/go-yaml@v1.11.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", + "http://ats.apps.svc/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", + ], + ) go_repository( name = "com_github_godbus_dbus_v5", build_file_proto_mode = "disable_global", @@ -2816,13 +2855,13 @@ def go_deps(): name = "com_github_golang_glog", build_file_proto_mode = "disable_global", importpath = "github.com/golang/glog", - sha256 = "07688d418628ff30ffd40fde44956d1fb6bae4436003d7fcca40c85236b9484a", - strip_prefix = "github.com/golang/glog@v1.2.0", + sha256 = "6b0fbe67a83fd3d468fcfad080decbedea4edd6586b5174a8c638133c81c4b99", + strip_prefix = "github.com/golang/glog@v1.2.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "http://ats.apps.svc/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.2.zip", + "http://ats.apps.svc/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.2.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.2.zip", ], ) go_repository( @@ -3076,13 +3115,13 @@ def go_deps(): name = "com_github_google_flatbuffers", build_file_proto_mode = "disable_global", importpath = "github.com/google/flatbuffers", - sha256 = "0c0a4aab1c6029141d655bc7fdc07e22dd06f3f64ebbf7a2250b870ef7aac7ee", - strip_prefix = "github.com/google/flatbuffers@v2.0.8+incompatible", + sha256 = "d067355c553528de4ca6e75a64013ee7336a48c2e98d9eb2d4c7803cc6051dac", + strip_prefix = "github.com/google/flatbuffers@v24.3.25+incompatible", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v2.0.8+incompatible.zip", - "http://ats.apps.svc/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v2.0.8+incompatible.zip", - "https://cache.hawkingrei.com/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v2.0.8+incompatible.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v2.0.8+incompatible.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v24.3.25+incompatible.zip", + "http://ats.apps.svc/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v24.3.25+incompatible.zip", + "https://cache.hawkingrei.com/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v24.3.25+incompatible.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/google/flatbuffers/com_github_google_flatbuffers-v24.3.25+incompatible.zip", ], ) go_repository( @@ -3532,6 +3571,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/guptarohit/asciigraph/com_github_guptarohit_asciigraph-v0.5.5.zip", ], ) + go_repository( + name = "com_github_hamba_avro_v2", + build_file_proto_mode = "disable_global", + importpath = "github.com/hamba/avro/v2", + sha256 = "43e5bb66c63e312431112307c7e40dd4ae772819169a48dfacb725fd737d46d2", + strip_prefix = "github.com/hamba/avro/v2@v2.27.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/hamba/avro/v2/com_github_hamba_avro_v2-v2.27.0.zip", + "http://ats.apps.svc/gomod/github.com/hamba/avro/v2/com_github_hamba_avro_v2-v2.27.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/hamba/avro/v2/com_github_hamba_avro_v2-v2.27.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/hamba/avro/v2/com_github_hamba_avro_v2-v2.27.0.zip", + ], + ) go_repository( name = "com_github_hashicorp_consul_api", build_file_proto_mode = "disable_global", @@ -3688,6 +3740,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/hashicorp/golang-lru/com_github_hashicorp_golang_lru-v0.6.0.zip", ], ) + go_repository( + name = "com_github_hashicorp_golang_lru_v2", + build_file_proto_mode = "disable_global", + importpath = "github.com/hashicorp/golang-lru/v2", + sha256 = "2eb92ff13970bccd460efae14255bfc03bb51474da0137e477a60f95561acc30", + strip_prefix = "github.com/hashicorp/golang-lru/v2@v2.0.7", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/hashicorp/golang-lru/v2/com_github_hashicorp_golang_lru_v2-v2.0.7.zip", + "http://ats.apps.svc/gomod/github.com/hashicorp/golang-lru/v2/com_github_hashicorp_golang_lru_v2-v2.0.7.zip", + "https://cache.hawkingrei.com/gomod/github.com/hashicorp/golang-lru/v2/com_github_hashicorp_golang_lru_v2-v2.0.7.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/hashicorp/golang-lru/v2/com_github_hashicorp_golang_lru_v2-v2.0.7.zip", + ], + ) go_repository( name = "com_github_hashicorp_hcl", build_file_proto_mode = "disable_global", @@ -4052,6 +4117,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/jmespath/go-jmespath/internal/testify/com_github_jmespath_go_jmespath_internal_testify-v1.5.1.zip", ], ) + go_repository( + name = "com_github_joechenrh_arrow_go_v18", + build_file_proto_mode = "disable_global", + importpath = "github.com/joechenrh/arrow-go/v18", + sha256 = "6569ded6b245e1f44cbd3c2e75187040372dfba9111b7f1c9d4e301b86d3316d", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20241216023057-f9949aab8c2d", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", + ], + ) go_repository( name = "com_github_johannesboyne_gofakes3", build_file_proto_mode = "disable_global", @@ -4385,13 +4463,13 @@ def go_deps(): name = "com_github_klauspost_compress", build_file_proto_mode = "disable_global", importpath = "github.com/klauspost/compress", - sha256 = "a009d53eecbdb9d6b789e9a0662fa41c87a85ab280291b2b5a5d9664bb1c5e8f", - strip_prefix = "github.com/klauspost/compress@v1.17.9", + sha256 = "88dea800cc6a11ccb9dd2f0dd487f30e8701870abdfc11245e41dcfc9f3d428e", + strip_prefix = "github.com/klauspost/compress@v1.17.11", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.9.zip", - "http://ats.apps.svc/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.9.zip", - "https://cache.hawkingrei.com/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.9.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.9.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.11.zip", + "http://ats.apps.svc/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.11.zip", + "https://cache.hawkingrei.com/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.11.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/klauspost/compress/com_github_klauspost_compress-v1.17.11.zip", ], ) go_repository( @@ -4411,13 +4489,13 @@ def go_deps(): name = "com_github_klauspost_cpuid_v2", build_file_proto_mode = "disable_global", importpath = "github.com/klauspost/cpuid/v2", - sha256 = "52c716413296dce2b1698c6cdbc4c53927ce4aee2a60980daf9672e6b6a3b4cb", - strip_prefix = "github.com/klauspost/cpuid/v2@v2.0.9", + sha256 = "59cd12e9094495dda4ce5f344bcbaa4238dfa7bb0eba66f514ec0bacd5ce99e5", + strip_prefix = "github.com/klauspost/cpuid/v2@v2.2.9", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.0.9.zip", - "http://ats.apps.svc/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.0.9.zip", - "https://cache.hawkingrei.com/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.0.9.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.0.9.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.2.9.zip", + "http://ats.apps.svc/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.2.9.zip", + "https://cache.hawkingrei.com/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.2.9.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/klauspost/cpuid/v2/com_github_klauspost_cpuid_v2-v2.2.9.zip", ], ) go_repository( @@ -5265,6 +5343,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/nbutton23/zxcvbn-go/com_github_nbutton23_zxcvbn_go-v0.0.0-20210217022336-fa2cb2858354.zip", ], ) + go_repository( + name = "com_github_ncruces_go_strftime", + build_file_proto_mode = "disable_global", + importpath = "github.com/ncruces/go-strftime", + sha256 = "3c46ee9c9db8fde8ce93c768a8701fa01f630bab0cfff338481cde768fe561ac", + strip_prefix = "github.com/ncruces/go-strftime@v0.1.9", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/ncruces/go-strftime/com_github_ncruces_go_strftime-v0.1.9.zip", + "http://ats.apps.svc/gomod/github.com/ncruces/go-strftime/com_github_ncruces_go_strftime-v0.1.9.zip", + "https://cache.hawkingrei.com/gomod/github.com/ncruces/go-strftime/com_github_ncruces_go_strftime-v0.1.9.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ncruces/go-strftime/com_github_ncruces_go_strftime-v0.1.9.zip", + ], + ) go_repository( name = "com_github_ncw_directio", build_file_proto_mode = "disable_global", @@ -5685,13 +5776,13 @@ def go_deps(): name = "com_github_pierrec_lz4_v4", build_file_proto_mode = "disable_global", importpath = "github.com/pierrec/lz4/v4", - sha256 = "5dadfc447d593c4a8a75520b9f048142d725e4d966d48883ece2380c16081900", - strip_prefix = "github.com/pierrec/lz4/v4@v4.1.15", + sha256 = "bd2e8ef13800ca42205b0d4085a927a6d012b82cfa831769be4830036e953bec", + strip_prefix = "github.com/pierrec/lz4/v4@v4.1.21", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.15.zip", - "http://ats.apps.svc/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.15.zip", - "https://cache.hawkingrei.com/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.15.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.15.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.21.zip", + "http://ats.apps.svc/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.21.zip", + "https://cache.hawkingrei.com/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.21.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pierrec/lz4/v4/com_github_pierrec_lz4_v4-v4.1.21.zip", ], ) go_repository( @@ -6760,6 +6851,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/stbenjam/no-sprintf-host-port/com_github_stbenjam_no_sprintf_host_port-v0.1.1.zip", ], ) + go_repository( + name = "com_github_stoewer_go_strcase", + build_file_proto_mode = "disable_global", + importpath = "github.com/stoewer/go-strcase", + sha256 = "42448455706d43dc6b51dc007ceb9726f2b928cb3137c82dc4cfce4e4c565d83", + strip_prefix = "github.com/stoewer/go-strcase@v1.3.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/stoewer/go-strcase/com_github_stoewer_go_strcase-v1.3.0.zip", + "http://ats.apps.svc/gomod/github.com/stoewer/go-strcase/com_github_stoewer_go_strcase-v1.3.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/stoewer/go-strcase/com_github_stoewer_go_strcase-v1.3.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/stoewer/go-strcase/com_github_stoewer_go_strcase-v1.3.0.zip", + ], + ) go_repository( name = "com_github_stretchr_objx", build_file_proto_mode = "disable_global", @@ -6799,6 +6903,32 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/subosito/gotenv/com_github_subosito_gotenv-v1.4.1.zip", ], ) + go_repository( + name = "com_github_substrait_io_substrait", + build_file_proto_mode = "disable_global", + importpath = "github.com/substrait-io/substrait", + sha256 = "c0f97dde3d195992c937764f322406500d12b7e197ca3eae581b4f9369ca22f5", + strip_prefix = "github.com/substrait-io/substrait@v0.57.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", + "http://ats.apps.svc/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", + ], + ) + go_repository( + name = "com_github_substrait_io_substrait_go", + build_file_proto_mode = "disable_global", + importpath = "github.com/substrait-io/substrait-go", + sha256 = "fd4e19b47316b161bca0f9c10da86a16db760954b40c822bdf601b6a33d8a2e0", + strip_prefix = "github.com/substrait-io/substrait-go@v1.2.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", + "http://ats.apps.svc/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", + ], + ) go_repository( name = "com_github_tdakkota_asciicheck", build_file_proto_mode = "disable_global", @@ -6916,6 +7046,58 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/btree/com_github_tidwall_btree-v1.7.0.zip", ], ) + go_repository( + name = "com_github_tidwall_gjson", + build_file_proto_mode = "disable_global", + importpath = "github.com/tidwall/gjson", + sha256 = "d180a76b70730c6daceb712de05f006861e6c173ae6cd14a1a01ae0bd28b403c", + strip_prefix = "github.com/tidwall/gjson@v1.14.2", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/tidwall/gjson/com_github_tidwall_gjson-v1.14.2.zip", + "http://ats.apps.svc/gomod/github.com/tidwall/gjson/com_github_tidwall_gjson-v1.14.2.zip", + "https://cache.hawkingrei.com/gomod/github.com/tidwall/gjson/com_github_tidwall_gjson-v1.14.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/gjson/com_github_tidwall_gjson-v1.14.2.zip", + ], + ) + go_repository( + name = "com_github_tidwall_match", + build_file_proto_mode = "disable_global", + importpath = "github.com/tidwall/match", + sha256 = "2ba41f7f27330d49e0e432cbf96bf90720a33e4a97be58fe53f63a7e66f04d37", + strip_prefix = "github.com/tidwall/match@v1.1.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/tidwall/match/com_github_tidwall_match-v1.1.1.zip", + "http://ats.apps.svc/gomod/github.com/tidwall/match/com_github_tidwall_match-v1.1.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/tidwall/match/com_github_tidwall_match-v1.1.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/match/com_github_tidwall_match-v1.1.1.zip", + ], + ) + go_repository( + name = "com_github_tidwall_pretty", + build_file_proto_mode = "disable_global", + importpath = "github.com/tidwall/pretty", + sha256 = "8a7050340a6d97d00e3ae41b4be167b9ef35d4df5596ecd59106e1aaf8774b48", + strip_prefix = "github.com/tidwall/pretty@v1.2.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/tidwall/pretty/com_github_tidwall_pretty-v1.2.0.zip", + "http://ats.apps.svc/gomod/github.com/tidwall/pretty/com_github_tidwall_pretty-v1.2.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/tidwall/pretty/com_github_tidwall_pretty-v1.2.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/pretty/com_github_tidwall_pretty-v1.2.0.zip", + ], + ) + go_repository( + name = "com_github_tidwall_sjson", + build_file_proto_mode = "disable_global", + importpath = "github.com/tidwall/sjson", + sha256 = "fc40fda87c7de55362f6386e069d674772f25c9d76411ee823acb9ac78b20565", + strip_prefix = "github.com/tidwall/sjson@v1.2.5", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/tidwall/sjson/com_github_tidwall_sjson-v1.2.5.zip", + "http://ats.apps.svc/gomod/github.com/tidwall/sjson/com_github_tidwall_sjson-v1.2.5.zip", + "https://cache.hawkingrei.com/gomod/github.com/tidwall/sjson/com_github_tidwall_sjson-v1.2.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/sjson/com_github_tidwall_sjson-v1.2.5.zip", + ], + ) go_repository( name = "com_github_tikv_client_go_v2", build_file_proto_mode = "disable_global", @@ -7358,6 +7540,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/xordataexchange/crypt/com_github_xordataexchange_crypt-v0.0.3-0.20170626215501-b2862e3d0a77.zip", ], ) + go_repository( + name = "com_github_xyproto_randomstring", + build_file_proto_mode = "disable_global", + importpath = "github.com/xyproto/randomstring", + sha256 = "58ea0c70496fc698c4597625395b3f35321df09be013ee880a6ce10e94969261", + strip_prefix = "github.com/xyproto/randomstring@v1.0.5", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/xyproto/randomstring/com_github_xyproto_randomstring-v1.0.5.zip", + "http://ats.apps.svc/gomod/github.com/xyproto/randomstring/com_github_xyproto_randomstring-v1.0.5.zip", + "https://cache.hawkingrei.com/gomod/github.com/xyproto/randomstring/com_github_xyproto_randomstring-v1.0.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/xyproto/randomstring/com_github_xyproto_randomstring-v1.0.5.zip", + ], + ) go_repository( name = "com_github_yagipy_maintidx", build_file_proto_mode = "disable_global", @@ -7817,26 +8012,26 @@ def go_deps(): name = "com_google_cloud_go_compute", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/compute", - sha256 = "0cf3d4325e378c92ff90cef3d1b7752682a77f0eaa0b11c092cc3ea32e5ed638", - strip_prefix = "cloud.google.com/go/compute@v1.24.0", + sha256 = "5173a017a15f7874e68752a8116556fe0d7e5e11344dd4265c454467bb651cb8", + strip_prefix = "cloud.google.com/go/compute@v1.25.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", ], ) go_repository( name = "com_google_cloud_go_compute_metadata", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/compute/metadata", - sha256 = "c0ab79c30870c1aa9912fb0fdcb043e0044782825988e40f59401d227976b677", - strip_prefix = "cloud.google.com/go/compute/metadata@v0.3.0", + sha256 = "5325feb8adc47daf4e4e74e21922c3e12b8f6201571b2aa3f7b413771190c2a3", + strip_prefix = "cloud.google.com/go/compute/metadata@v0.5.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", ], ) go_repository( @@ -9949,26 +10144,26 @@ def go_deps(): name = "org_golang_google_genproto_googleapis_rpc", build_file_proto_mode = "disable_global", importpath = "google.golang.org/genproto/googleapis/rpc", - sha256 = "53ce5ee04a9fd853c81fdd00cd06b426ec3212e57ae6d591153ad823243bae8a", - strip_prefix = "google.golang.org/genproto/googleapis/rpc@v0.0.0-20240515191416-fc5f0ca64291", + sha256 = "798f4e9522193634403993f16fa73824d58282efbda9f48805071dab5008154f", + strip_prefix = "google.golang.org/genproto/googleapis/rpc@v0.0.0-20241104194629-dd2ea8efbc28", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20240515191416-fc5f0ca64291.zip", - "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20240515191416-fc5f0ca64291.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20240515191416-fc5f0ca64291.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20240515191416-fc5f0ca64291.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", + "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", ], ) go_repository( name = "org_golang_google_grpc", build_file_proto_mode = "disable_global", importpath = "google.golang.org/grpc", - sha256 = "1d49288986efd05a7b4ac6cee078f84e29e464678c776f25e09efddfba852fd1", - strip_prefix = "google.golang.org/grpc@v1.63.2", + sha256 = "beb1f50938450fddfe1d3cd713f4e573f08c59057fb5d12467e83da69da4bb7b", + strip_prefix = "google.golang.org/grpc@v1.64.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", - "http://ats.apps.svc/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", + "http://ats.apps.svc/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", ], ) go_repository( @@ -10235,13 +10430,13 @@ def go_deps(): name = "org_gonum_v1_gonum", build_file_proto_mode = "disable_global", importpath = "gonum.org/v1/gonum", - sha256 = "abdfee15ce7c9d2cd96b66468d3ae28d6054add4efbfc1b15fadfe3613f3d362", - strip_prefix = "gonum.org/v1/gonum@v0.11.0", + sha256 = "7a1b124a144b2c97a29829464d4b7258e04235c1fb14bbcea902086618414a43", + strip_prefix = "gonum.org/v1/gonum@v0.15.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.11.0.zip", - "http://ats.apps.svc/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.11.0.zip", - "https://cache.hawkingrei.com/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.11.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.11.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.15.1.zip", + "http://ats.apps.svc/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.15.1.zip", + "https://cache.hawkingrei.com/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.15.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/gonum.org/v1/gonum/org_gonum_v1_gonum-v0.15.1.zip", ], ) go_repository( @@ -10309,6 +10504,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/ccorpus/org_modernc_ccorpus-v1.11.6.zip", ], ) + go_repository( + name = "org_modernc_gc_v3", + build_file_proto_mode = "disable_global", + importpath = "modernc.org/gc/v3", + sha256 = "d533e65369fe0dfcd19c725226a30e47b184092603ec7ee38dd5d056a2ad1474", + strip_prefix = "modernc.org/gc/v3@v3.0.0-20240107210532-573471604cb6", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/gc/v3/org_modernc_gc_v3-v3.0.0-20240107210532-573471604cb6.zip", + "http://ats.apps.svc/gomod/modernc.org/gc/v3/org_modernc_gc_v3-v3.0.0-20240107210532-573471604cb6.zip", + "https://cache.hawkingrei.com/gomod/modernc.org/gc/v3/org_modernc_gc_v3-v3.0.0-20240107210532-573471604cb6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/gc/v3/org_modernc_gc_v3-v3.0.0-20240107210532-573471604cb6.zip", + ], + ) go_repository( name = "org_modernc_golex", build_file_proto_mode = "disable_global", @@ -10339,13 +10547,13 @@ def go_deps(): name = "org_modernc_libc", build_file_proto_mode = "disable_global", importpath = "modernc.org/libc", - sha256 = "5f98bedf9f0663b3b87555904ee41b82fe9d8e9ac5c47c9fac9a42a7fe232313", - strip_prefix = "modernc.org/libc@v1.22.2", + sha256 = "e7857dd7ac722f43be7f13a43db632f53b67067010416f330b8b80664af9b619", + strip_prefix = "modernc.org/libc@v1.41.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/libc/org_modernc_libc-v1.22.2.zip", - "http://ats.apps.svc/gomod/modernc.org/libc/org_modernc_libc-v1.22.2.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/libc/org_modernc_libc-v1.22.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/libc/org_modernc_libc-v1.22.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/libc/org_modernc_libc-v1.41.0.zip", + "http://ats.apps.svc/gomod/modernc.org/libc/org_modernc_libc-v1.41.0.zip", + "https://cache.hawkingrei.com/gomod/modernc.org/libc/org_modernc_libc-v1.41.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/libc/org_modernc_libc-v1.41.0.zip", ], ) go_repository( @@ -10365,13 +10573,13 @@ def go_deps(): name = "org_modernc_memory", build_file_proto_mode = "disable_global", importpath = "modernc.org/memory", - sha256 = "f79e8ada14c36d08817ee2bf6b2103f65c1a61a91b042b59016465869624043c", - strip_prefix = "modernc.org/memory@v1.5.0", + sha256 = "fb640d04a514b88f7d988e8b79b41f46e7958da5ea3e69505199c4059138d189", + strip_prefix = "modernc.org/memory@v1.7.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/memory/org_modernc_memory-v1.5.0.zip", - "http://ats.apps.svc/gomod/modernc.org/memory/org_modernc_memory-v1.5.0.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/memory/org_modernc_memory-v1.5.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/memory/org_modernc_memory-v1.5.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/memory/org_modernc_memory-v1.7.2.zip", + "http://ats.apps.svc/gomod/modernc.org/memory/org_modernc_memory-v1.7.2.zip", + "https://cache.hawkingrei.com/gomod/modernc.org/memory/org_modernc_memory-v1.7.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/memory/org_modernc_memory-v1.7.2.zip", ], ) go_repository( @@ -10417,13 +10625,13 @@ def go_deps(): name = "org_modernc_sqlite", build_file_proto_mode = "disable_global", importpath = "modernc.org/sqlite", - sha256 = "be0501f87458962a00c8fb07d1f131af010a534cd6ffb654c570be35b9b608d5", - strip_prefix = "modernc.org/sqlite@v1.18.2", + sha256 = "3add9bde5a932e2a52cd2b50a1be276332d91f196767a5be740afa3ee037afad", + strip_prefix = "modernc.org/sqlite@v1.29.6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.18.2.zip", - "http://ats.apps.svc/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.18.2.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.18.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.18.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.29.6.zip", + "http://ats.apps.svc/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.29.6.zip", + "https://cache.hawkingrei.com/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.29.6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/sqlite/org_modernc_sqlite-v1.29.6.zip", ], ) go_repository( diff --git a/lightning/pkg/importer/BUILD.bazel b/lightning/pkg/importer/BUILD.bazel index 4f2413b0832a7..4211373437bf2 100644 --- a/lightning/pkg/importer/BUILD.bazel +++ b/lightning/pkg/importer/BUILD.bazel @@ -20,6 +20,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//br/pkg/errors", + "//br/pkg/membuf", "//br/pkg/pdutil", "//br/pkg/storage", "//br/pkg/streamhelper", @@ -74,6 +75,7 @@ go_library( "//pkg/util/engine", "//pkg/util/etcd", "//pkg/util/extsort", + "//pkg/util/memory", "//pkg/util/redact", "//pkg/util/regexpr-router", "//pkg/util/set", diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 381da027f3f29..904adad3e2bc4 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -39,12 +39,13 @@ go_library( "//pkg/util/sqlescape", "//pkg/util/table-filter", "//pkg/util/zeropool", + "@com_github_joechenrh_arrow_go_v18//arrow/memory:go_default_library", + "@com_github_joechenrh_arrow_go_v18//parquet:go_default_library", + "@com_github_joechenrh_arrow_go_v18//parquet/file:go_default_library", + "@com_github_joechenrh_arrow_go_v18//parquet/schema:go_default_library", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_spkg_bom//:bom", - "@com_github_xitongsys_parquet_go//parquet", - "@com_github_xitongsys_parquet_go//reader", - "@com_github_xitongsys_parquet_go//source", "@org_golang_x_sync//errgroup", "@org_golang_x_text//encoding", "@org_golang_x_text//encoding/charmap", From 7ff086caa6994282e6f9a12db0ed57ebf0d023ab Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 18 Dec 2024 13:14:39 +0800 Subject: [PATCH 16/93] Update go.mod --- DEPS.bzl | 36 ++++++++++++++++---------------- go.mod | 3 +-- go.sum | 4 ++-- pkg/lightning/mydump/BUILD.bazel | 8 +++---- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index d206e208ac85b..9a41cb3511824 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -1100,13 +1100,13 @@ def go_deps(): name = "com_github_cncf_xds_go", build_file_proto_mode = "disable_global", importpath = "github.com/cncf/xds/go", - sha256 = "7395d4a588bcabf822f2347b647b66853a14a98088dd1ea0582cfa7a241c4234", - strip_prefix = "github.com/cncf/xds/go@v0.0.0-20240318125728-8a4994d93e50", + sha256 = "ab0d2fd980b15a582708a728cf8080ebb88778e59f3003b67c6aafaa9ad0f447", + strip_prefix = "github.com/cncf/xds/go@v0.0.0-20231128003011-0fa0005c9caa", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", - "http://ats.apps.svc/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", - "https://cache.hawkingrei.com/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20240318125728-8a4994d93e50.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", + "http://ats.apps.svc/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", + "https://cache.hawkingrei.com/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", ], ) go_repository( @@ -8012,13 +8012,13 @@ def go_deps(): name = "com_google_cloud_go_compute", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/compute", - sha256 = "5173a017a15f7874e68752a8116556fe0d7e5e11344dd4265c454467bb651cb8", - strip_prefix = "cloud.google.com/go/compute@v1.25.1", + sha256 = "0cf3d4325e378c92ff90cef3d1b7752682a77f0eaa0b11c092cc3ea32e5ed638", + strip_prefix = "cloud.google.com/go/compute@v1.24.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", ], ) go_repository( @@ -10157,13 +10157,13 @@ def go_deps(): name = "org_golang_google_grpc", build_file_proto_mode = "disable_global", importpath = "google.golang.org/grpc", - sha256 = "beb1f50938450fddfe1d3cd713f4e573f08c59057fb5d12467e83da69da4bb7b", - strip_prefix = "google.golang.org/grpc@v1.64.1", + sha256 = "1d49288986efd05a7b4ac6cee078f84e29e464678c776f25e09efddfba852fd1", + strip_prefix = "google.golang.org/grpc@v1.63.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", - "http://ats.apps.svc/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.64.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", + "http://ats.apps.svc/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/grpc/org_golang_google_grpc-v1.63.2.zip", ], ) go_repository( diff --git a/go.mod b/go.mod index 6b0b938a835cb..9117f73e8d890 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/YangKeao/go-mysql-driver v0.0.0-20240627104025-dd5589458cfa github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 - //github.com/joechenrh/arrow-go/v18 v18.0.0 github.com/apache/skywalking-eyes v0.4.0 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/ashanbrown/makezero v1.1.1 @@ -141,7 +140,7 @@ require ( golang.org/x/time v0.7.0 golang.org/x/tools v0.28.0 google.golang.org/api v0.169.0 - google.golang.org/grpc v1.64.1 + google.golang.org/grpc v1.63.2 gopkg.in/yaml.v2 v2.4.0 gorm.io/driver/mysql v1.5.7 gorm.io/gorm v1.25.11 diff --git a/go.sum b/go.sum index 8a1c13c5609b5..b6703cc97f237 100644 --- a/go.sum +++ b/go.sum @@ -1373,8 +1373,8 @@ google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTp google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= -google.golang.org/grpc v1.64.1 h1:LKtvyfbX3UGVPFcGqJ9ItpVWW6oN/2XqTxfAnwRRXiA= -google.golang.org/grpc v1.64.1/go.mod h1:hiQF4LFZelK2WKaP6W0L92zGHtiQdZxk8CrSdvyjeP0= +google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= +google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 904adad3e2bc4..88fac57fe9c3c 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -39,10 +39,10 @@ go_library( "//pkg/util/sqlescape", "//pkg/util/table-filter", "//pkg/util/zeropool", - "@com_github_joechenrh_arrow_go_v18//arrow/memory:go_default_library", - "@com_github_joechenrh_arrow_go_v18//parquet:go_default_library", - "@com_github_joechenrh_arrow_go_v18//parquet/file:go_default_library", - "@com_github_joechenrh_arrow_go_v18//parquet/schema:go_default_library", + "@com_github_joechenrh_arrow_go_v18//arrow/memory", + "@com_github_joechenrh_arrow_go_v18//parquet", + "@com_github_joechenrh_arrow_go_v18//parquet/file", + "@com_github_joechenrh_arrow_go_v18//parquet/schema", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_spkg_bom//:bom", From bd3b5a8ecc74e7d789bf213a7c691e551258b017 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 19 Dec 2024 10:19:37 +0800 Subject: [PATCH 17/93] Add pprof for test --- lightning/cmd/tidb-lightning/main.go | 25 +- lightning/pkg/importer/table_import.go | 4 +- pkg/lightning/mydump/parquet_parser.go | 301 +++++++++++++------------ 3 files changed, 180 insertions(+), 150 deletions(-) diff --git a/lightning/cmd/tidb-lightning/main.go b/lightning/cmd/tidb-lightning/main.go index d68d6033acc4f..c5e2f24131f18 100644 --- a/lightning/cmd/tidb-lightning/main.go +++ b/lightning/cmd/tidb-lightning/main.go @@ -20,6 +20,7 @@ import ( "os" "os/signal" "runtime/debug" + "runtime/pprof" "syscall" "github.com/pingcap/tidb/lightning/pkg/server" @@ -29,9 +30,31 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" + + "net/http" + _ "net/http/pprof" ) func main() { + go func() { + http.ListenAndServe("0.0.0.0:8899", nil) + }() + + // Create a memory profile file + f, err := os.Create("mem.pprof") + if err != nil { + fmt.Println("Failed to create memory profile file:", err) + return + } + defer f.Close() + + // Start the memory profile + if err := pprof.StartCPUProfile(f); err != nil { + fmt.Println("Failed to start memory profile:", err) + return + } + defer pprof.StopCPUProfile() + globalCfg := config.Must(config.LoadGlobalConfig(os.Args[1:], nil)) logToFile := globalCfg.App.File != "" && globalCfg.App.File != "-" if logToFile { @@ -75,7 +98,7 @@ func main() { } } - err := app.GoServe() + err = app.GoServe() if err != nil { logger.Error("failed to start HTTP server", zap.Error(err)) fmt.Fprintln(os.Stderr, "failed to start HTTP server:", err) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 52dcc3d38257f..d6198da4ac267 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -801,7 +801,9 @@ ChunkLoop: // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { - // To avoid OOM during file opening, we update the waterline before reading. + // To avoid OOM during file opening, + // we have to ensure that we have enough memory budget before reading. + // Here we use the maximum memory usage we have ever seen as the memory usage estimation. memLimiter.Acquire(maxMemoryUsage) pp := cr.parser.(*mydump.ParquetParser) if _, err := pp.ReadRows(64); err != nil { diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 5c1e137d45059..4ec40950fdf20 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -24,15 +24,14 @@ import ( "sync/atomic" "time" - "github.com/pingcap/errors" - "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/pkg/lightning/log" - "github.com/pingcap/tidb/pkg/types" - "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/joechenrh/arrow-go/v18/parquet" "github.com/joechenrh/arrow-go/v18/parquet/file" "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/pingcap/errors" + "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/types" ) const ( @@ -70,7 +69,7 @@ func (a *allocatorWithStats) Allocated() int64 { return a.allocated.Load() } -type Dumper struct { +type columnDumper struct { reader file.ColumnChunkReader batchSize int64 valueOffset int @@ -81,13 +80,13 @@ type Dumper struct { defLevels []int16 repLevels []int16 - valueBuffer interface{} + valueBuffer any } -func createDumper(tp parquet.Type) *Dumper { +func createcolumnDumper(tp parquet.Type) *columnDumper { batchSize := 128 - var valueBuffer interface{} + var valueBuffer any switch tp { case parquet.Types.Boolean: valueBuffer = make([]bool, batchSize) @@ -107,7 +106,7 @@ func createDumper(tp parquet.Type) *Dumper { valueBuffer = make([]parquet.FixedLenByteArray, batchSize) } - return &Dumper{ + return &columnDumper{ batchSize: int64(batchSize), defLevels: make([]int16, batchSize), repLevels: make([]int16, batchSize), @@ -115,41 +114,41 @@ func createDumper(tp parquet.Type) *Dumper { } } -func (dump *Dumper) Type() parquet.Type { +func (dump *columnDumper) Type() parquet.Type { return dump.reader.Type() } -func (dump *Dumper) SetReader(colReader file.ColumnChunkReader) { +func (dump *columnDumper) SetReader(colReader file.ColumnChunkReader) { dump.reader = colReader dump.valueOffset = 0 dump.levelOffset = 0 } -func (dump *Dumper) readNextBatch(req int64) int { +func (dump *columnDumper) readNextBatch(req int64) int { switch reader := dump.reader.(type) { case *file.BooleanColumnChunkReader: - values := dump.valueBuffer.([]bool) + values, _ := dump.valueBuffer.([]bool) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int32ColumnChunkReader: - values := dump.valueBuffer.([]int32) + values, _ := dump.valueBuffer.([]int32) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int64ColumnChunkReader: - values := dump.valueBuffer.([]int64) + values, _ := dump.valueBuffer.([]int64) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Float32ColumnChunkReader: - values := dump.valueBuffer.([]float32) + values, _ := dump.valueBuffer.([]float32) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Float64ColumnChunkReader: - values := dump.valueBuffer.([]float64) + values, _ := dump.valueBuffer.([]float64) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.Int96ColumnChunkReader: - values := dump.valueBuffer.([]parquet.Int96) + values, _ := dump.valueBuffer.([]parquet.Int96) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.ByteArrayColumnChunkReader: - values := dump.valueBuffer.([]parquet.ByteArray) + values, _ := dump.valueBuffer.([]parquet.ByteArray) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) case *file.FixedLenByteArrayColumnChunkReader: - values := dump.valueBuffer.([]parquet.FixedLenByteArray) + values, _ := dump.valueBuffer.([]parquet.FixedLenByteArray) dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) } @@ -326,7 +325,7 @@ type ParquetParser struct { // colBuffers is used to store raw data read from parquet columns. // rows stores the actual data after parsing. - dumpers []*Dumper + dumpers []*columnDumper rows [][]types.Datum // curIdx and avail is the current index and total number of rows in rows buffer @@ -346,103 +345,108 @@ type ParquetParser struct { } // GetMemoryUage estimate the memory usage for this file. -func (p *ParquetParser) GetMemoryUage() int64 { +func (pp *ParquetParser) GetMemoryUage() int64 { // The reason for multiplying by six is as follows: // 1. The file reader requires a buffer to accommodate at least one data page. // 2. The page reader needs two buffers: one for storing compressed data and another for uncompressed data. // 3. Only the uncompressed data will be recorded in the parser. // 4. When moving to the next row group, we may allocate three additional buffers. // Therefore, we multiply the memory usage by six to estimate the memory usage. - return p.alloc.Allocated() * 6 + return pp.alloc.Allocated() * 6 } -func (p *ParquetParser) setStringData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]parquet.ByteArray) +func (pp *ParquetParser) setStringData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetString(buf[i].String(), "utf8mb4_bin") + pp.rows[offset+i][col].SetString(buf[i].String(), "utf8mb4_bin") } } -func (p *ParquetParser) setInt32Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int32) +func (pp *ParquetParser) setInt32Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetInt64(int64(buf[i])) + pp.rows[offset+i][col].SetInt64(int64(buf[i])) } } -func (p *ParquetParser) setUint32Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int64) +func (pp *ParquetParser) setUint32Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetUint64(uint64(buf[i])) + pp.rows[offset+i][col].SetUint64(uint64(buf[i])) } } -func (p *ParquetParser) setInt64Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int64) +func (pp *ParquetParser) setInt64Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetInt64(int64(buf[i])) + pp.rows[offset+i][col].SetInt64(int64(buf[i])) } } -func (p *ParquetParser) setUint64Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int64) +func (pp *ParquetParser) setUint64Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetUint64(uint64(buf[i])) + pp.rows[offset+i][col].SetUint64(uint64(buf[i])) } } -func (p *ParquetParser) setTimeMillisData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int32) +func (pp *ParquetParser) setTimeMillisData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) - p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } -func (p *ParquetParser) setTimeMicrosData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int32) +func (pp *ParquetParser) setTimeMicrosData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) - p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } -func (p *ParquetParser) setTimestampMillisData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int64) +func (pp *ParquetParser) setTimestampMillisData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout, true) - p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } -func (p *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int64) +func (pp *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout, true) - p.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") + pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") } } -func (p *ParquetParser) setDateData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]int32) +func (pp *ParquetParser) setDateData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]int32) for i := 0; i < readNum; i++ { dateStr := time.Unix(int64(buf[i])*86400, 0).Format(time.DateOnly) - p.rows[offset+i][col].SetString(dateStr, "utf8mb4_bin") + pp.rows[offset+i][col].SetString(dateStr, "utf8mb4_bin") } } -func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { - colTp := p.dumpers[col].Type() - decimal := p.colMetas[col].decimalMeta +func (pp *ParquetParser) setDecimalData(readNum, col, offset int) error { + colTp := pp.dumpers[col].Type() + decimal := pp.colMetas[col].decimalMeta + + int32buf, _ := pp.dumpers[col].valueBuffer.([]int32) + int64buf, _ := pp.dumpers[col].valueBuffer.([]int64) + fixBuf, _ := pp.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) + byteBuf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { - v := p.dumpers[col].valueBuffer.([]int64)[i] + v := int64buf[i] if colTp == parquet.Types.Int32 { - v = int64(p.dumpers[col].valueBuffer.([]int32)[i]) + v = int64(int32buf[i]) } if !decimal.IsSet || decimal.Scale == 0 { - p.rows[offset+i][col].SetInt64(v) + pp.rows[offset+i][col].SetInt64(v) continue } minLen := decimal.Scale + 1 @@ -451,58 +455,58 @@ func (p *ParquetParser) setDecimalData(readNum, col, offset int) error { } val := fmt.Sprintf("%0*d", minLen, v) dotIndex := len(val) - int(decimal.Scale) - p.rows[offset+i][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") + pp.rows[offset+i][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") } else if colTp == parquet.Types.FixedLenByteArray { - s := binaryToDecimalStr(p.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray)[i], int(decimal.Scale)) - p.rows[offset+i][col].SetString(s, "utf8mb4_bin") + s := binaryToDecimalStr(fixBuf[i], int(decimal.Scale)) + pp.rows[offset+i][col].SetString(s, "utf8mb4_bin") } else { - s := binaryToDecimalStr(p.dumpers[col].valueBuffer.([]parquet.ByteArray)[i], int(decimal.Scale)) - p.rows[offset+i][col].SetString(s, "utf8mb4_bin") + s := binaryToDecimalStr(byteBuf[i], int(decimal.Scale)) + pp.rows[offset+i][col].SetString(s, "utf8mb4_bin") } } return nil } -func (p *ParquetParser) setBoolData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]bool) +func (pp *ParquetParser) setBoolData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]bool) for i := 0; i < readNum; i++ { if buf[i] { - p.rows[offset+i][col].SetUint64(1) + pp.rows[offset+i][col].SetUint64(1) } else { - p.rows[offset+i][col].SetUint64(0) + pp.rows[offset+i][col].SetUint64(0) } } } -func (p *ParquetParser) setFloat32Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]float32) +func (pp *ParquetParser) setFloat32Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]float32) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetFloat32(buf[i]) + pp.rows[offset+i][col].SetFloat32(buf[i]) } } -func (p *ParquetParser) setFloat64Data(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]float64) +func (pp *ParquetParser) setFloat64Data(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]float64) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetFloat64(buf[i]) + pp.rows[offset+i][col].SetFloat64(buf[i]) } } -func (p *ParquetParser) setFixedByteArrayData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) +func (pp *ParquetParser) setFixedByteArrayData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") + pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") } } -func (p *ParquetParser) setByteArrayData(readNum, col, offset int) { - buf := p.dumpers[col].valueBuffer.([]parquet.ByteArray) +func (pp *ParquetParser) setByteArrayData(readNum, col, offset int) { + buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") + pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") } } -func (p *ParquetParser) setInt96Data(readNum, col, offset int) { +func (pp *ParquetParser) setInt96Data(readNum, col, offset int) { // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 // INT96 timestamp layout // -------------------------- @@ -513,37 +517,37 @@ func (p *ParquetParser) setInt96Data(readNum, col, offset int) { // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, // where dt is a negative number but still legal in the context of Go. // But it will cause errors or potential data inconsistency when importing. - buf := p.dumpers[col].valueBuffer.([]parquet.Int96) + buf, _ := pp.dumpers[col].valueBuffer.([]parquet.Int96) for i := 0; i < readNum; i++ { - p.rows[offset+i][col].SetString(buf[i].ToTime().Format(utcTimeLayout), "utf8mb4_bin") + pp.rows[offset+i][col].SetString(buf[i].ToTime().Format(utcTimeLayout), "utf8mb4_bin") } } // Init initializes the Parquet parser and allocate necessary buffers -func (p *ParquetParser) Init() error { - meta := p.readers[0].MetaData() +func (pp *ParquetParser) Init() error { + meta := pp.readers[0].MetaData() - p.curRowGroup, p.totalRowGroup = -1, p.readers[0].NumRowGroups() + pp.curRowGroup, pp.totalRowGroup = -1, pp.readers[0].NumRowGroups() - p.totalRows = int(meta.NumRows) + pp.totalRows = int(meta.NumRows) numCols := meta.Schema.NumColumns() - p.rows = make([][]types.Datum, defaultBatchSize) - for i := range p.rows { - p.rows[i] = make([]types.Datum, numCols) + pp.rows = make([][]types.Datum, defaultBatchSize) + for i := range pp.rows { + pp.rows[i] = make([]types.Datum, numCols) } - p.dumpers = make([]*Dumper, numCols) + pp.dumpers = make([]*columnDumper, numCols) for i := 0; i < numCols; i++ { - p.dumpers[i] = createDumper(meta.Schema.Column(i).PhysicalType()) + pp.dumpers[i] = createcolumnDumper(meta.Schema.Column(i).PhysicalType()) } return nil } // ReadRows read several rows internally and store them in the row buffer. -func (p *ParquetParser) ReadRows(num int) (int, error) { - readNum := min(num, p.totalRows-p.curRows) +func (pp *ParquetParser) ReadRows(num int) (int, error) { + readNum := min(num, pp.totalRows-pp.curRows) if readNum == 0 { return 0, nil } @@ -551,95 +555,95 @@ func (p *ParquetParser) ReadRows(num int) (int, error) { read := 0 for read < readNum { // Move to next row group - if p.curRowInGroup == p.totalRowsInGroup { - p.curRowGroup++ - for c := 0; c < len(p.dumpers); c++ { - rowGroupReader := p.readers[c].RowGroup(p.curRowGroup) + if pp.curRowInGroup == pp.totalRowsInGroup { + pp.curRowGroup++ + for c := 0; c < len(pp.dumpers); c++ { + rowGroupReader := pp.readers[c].RowGroup(pp.curRowGroup) colReader, err := rowGroupReader.Column(c) if err != nil { return 0, errors.Trace(err) } - p.dumpers[c].SetReader(colReader) + pp.dumpers[c].SetReader(colReader) } - p.curRowInGroup, p.totalRowsInGroup = 0, int(p.readers[0].MetaData().RowGroups[p.curRowGroup].NumRows) + pp.curRowInGroup, pp.totalRowsInGroup = 0, int(pp.readers[0].MetaData().RowGroups[pp.curRowGroup].NumRows) } // Read in this group - curRead := min(readNum-read, p.totalRowsInGroup-p.curRowInGroup) - _, err := p.readInGroup(curRead, read) + curRead := min(readNum-read, pp.totalRowsInGroup-pp.curRowInGroup) + _, err := pp.readInGroup(curRead, read) if err != nil { return 0, errors.Trace(err) } read += curRead - p.curRowInGroup += curRead + pp.curRowInGroup += curRead } - p.curRows += readNum - p.curIdx, p.avail = 0, readNum + pp.curRows += readNum + pp.curIdx, pp.avail = 0, readNum return readNum, nil } // readInGroup read severals rows in current row group. // storeOffset represents the starting position for storing the read rows. // It's a part of the ReadRows. -func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { +func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { var ( err error total int ) // Read data into buffers first - for i, dumper := range p.dumpers { + for i, dumper := range pp.dumpers { total = dumper.readNextBatch(int64(num)) - meta := p.colMetas[i] + meta := pp.colMetas[i] physicalTp := dumper.Type() // If we can't get converted type, just use physical type if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { switch physicalTp { case parquet.Types.Boolean: - p.setBoolData(num, i, storeOffset) + pp.setBoolData(num, i, storeOffset) case parquet.Types.Int32: - p.setInt32Data(num, i, storeOffset) + pp.setInt32Data(num, i, storeOffset) case parquet.Types.Int64: - p.setInt64Data(num, i, storeOffset) + pp.setInt64Data(num, i, storeOffset) case parquet.Types.Int96: - p.setInt96Data(num, i, storeOffset) + pp.setInt96Data(num, i, storeOffset) case parquet.Types.Float: - p.setFloat32Data(num, i, storeOffset) + pp.setFloat32Data(num, i, storeOffset) case parquet.Types.Double: - p.setFloat64Data(num, i, storeOffset) + pp.setFloat64Data(num, i, storeOffset) case parquet.Types.ByteArray: - p.setByteArrayData(num, i, storeOffset) + pp.setByteArrayData(num, i, storeOffset) case parquet.Types.FixedLenByteArray: - p.setFixedByteArrayData(num, i, storeOffset) + pp.setFixedByteArrayData(num, i, storeOffset) } continue } switch meta.converted { case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: - p.setStringData(num, i, storeOffset) + pp.setStringData(num, i, storeOffset) case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: - p.setInt32Data(num, i, storeOffset) + pp.setInt32Data(num, i, storeOffset) case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: - p.setUint32Data(num, i, storeOffset) + pp.setUint32Data(num, i, storeOffset) case schema.ConvertedTypes.Int64: - p.setInt64Data(num, i, storeOffset) + pp.setInt64Data(num, i, storeOffset) case schema.ConvertedTypes.Uint64: - p.setUint64Data(num, i, storeOffset) + pp.setUint64Data(num, i, storeOffset) case schema.ConvertedTypes.TimeMillis: - p.setTimeMillisData(num, i, storeOffset) + pp.setTimeMillisData(num, i, storeOffset) case schema.ConvertedTypes.TimeMicros: - p.setTimeMicrosData(num, i, storeOffset) + pp.setTimeMicrosData(num, i, storeOffset) case schema.ConvertedTypes.TimestampMillis: - p.setTimestampMillisData(num, i, storeOffset) + pp.setTimestampMillisData(num, i, storeOffset) case schema.ConvertedTypes.TimestampMicros: - p.setTimestampMicrosData(num, i, storeOffset) + pp.setTimestampMicrosData(num, i, storeOffset) case schema.ConvertedTypes.Date: - p.setDateData(num, i, storeOffset) + pp.setDateData(num, i, storeOffset) case schema.ConvertedTypes.Decimal: - p.setDecimalData(num, i, storeOffset) + err = pp.setDecimalData(num, i, storeOffset) } } @@ -647,23 +651,23 @@ func (p *ParquetParser) readInGroup(num, storeOffset int) (int, error) { } // Pos returns the currently row number of the parquet file -func (p *ParquetParser) Pos() (pos int64, rowID int64) { - return int64(p.curRows - p.avail + p.curIdx), p.lastRow.RowID +func (pp *ParquetParser) Pos() (pos int64, rowID int64) { + return int64(pp.curRows - pp.avail + pp.curIdx), pp.lastRow.RowID } // SetPos implements the Parser interface. // For parquet file, this interface will read and discard the first `pos` rows, // and set the current row ID to `rowID` -func (p *ParquetParser) SetPos(pos int64, rowID int64) error { - p.lastRow.RowID = rowID - if pos < int64(p.curRows) { +func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { + pp.lastRow.RowID = rowID + if pos < int64(pp.curRows) { panic("don't support seek back yet") } // Read and discard these rows - read := int(pos) - p.curRows - _, err := p.ReadRows(read) - p.curIdx, p.avail = 0, 0 + read := int(pos) - pp.curRows + _, err := pp.ReadRows(read) + pp.curIdx, pp.avail = 0, 0 return errors.Trace(err) } @@ -688,9 +692,9 @@ func (pp *ParquetParser) Close() error { // GetRow get the the current row. // Return error if can't read next row. // User should call ReadRow before calling this. -func (p *ParquetParser) GetRow() ([]types.Datum, error) { - if p.curIdx >= p.avail { - read, err := p.ReadRows(defaultBatchSize) +func (pp *ParquetParser) GetRow() ([]types.Datum, error) { + if pp.curIdx >= pp.avail { + read, err := pp.ReadRows(defaultBatchSize) if err != nil { return nil, errors.Trace(err) } @@ -699,26 +703,26 @@ func (p *ParquetParser) GetRow() ([]types.Datum, error) { } } - row := p.rows[p.curIdx] - p.curIdx++ + row := pp.rows[pp.curIdx] + pp.curIdx++ return row, nil } // ReadRow reads a row in the parquet file by the parser. // It implements the Parser interface. // Return io.EOF if reaching the end of the file. -func (p *ParquetParser) ReadRow() error { - p.lastRow.RowID++ - p.lastRow.Length = 0 - row, err := p.GetRow() +func (pp *ParquetParser) ReadRow() error { + pp.lastRow.RowID++ + pp.lastRow.Length = 0 + row, err := pp.GetRow() if err != nil { return errors.Trace(err) } if row == nil { return io.EOF } - p.lastRow.Row = row - p.lastRow.Length = 0 + pp.lastRow.Row = row + pp.lastRow.Length = 0 return nil } @@ -860,7 +864,8 @@ func NewParquetParser( columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() } else { columnMetas[i].converted = desc.ConvertedType() - columnMetas[i].decimalMeta = desc.SchemaNode().(*schema.PrimitiveNode).DecimalMetadata() + pnode, _ := desc.SchemaNode().(*schema.PrimitiveNode) + columnMetas[i].decimalMeta = pnode.DecimalMetadata() } } From 494baad301fa6f4bdf36ae1829d495eb851ef1f6 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 24 Dec 2024 10:06:04 +0800 Subject: [PATCH 18/93] [Test Only] output heap usage --- lightning/cmd/tidb-lightning/main.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lightning/cmd/tidb-lightning/main.go b/lightning/cmd/tidb-lightning/main.go index c5e2f24131f18..3166a2280489d 100644 --- a/lightning/cmd/tidb-lightning/main.go +++ b/lightning/cmd/tidb-lightning/main.go @@ -19,9 +19,11 @@ import ( "fmt" "os" "os/signal" + "runtime" "runtime/debug" "runtime/pprof" "syscall" + "time" "github.com/pingcap/tidb/lightning/pkg/server" "github.com/pingcap/tidb/lightning/pkg/web" @@ -35,6 +37,24 @@ import ( _ "net/http/pprof" ) +func bToMb(b uint64) uint64 { + return b / (1024 * 1024) +} + +func TrackSysMemUsage(ctx context.Context) { + tick := time.NewTicker(3 * time.Second) + for { + select { + case <-ctx.Done(): + return + case <-tick.C: + var m runtime.MemStats + runtime.ReadMemStats(&m) + fmt.Printf("HeapInUse = %v MiB, HeapAlloc = %v MiB\n", bToMb(m.HeapInuse), bToMb(m.HeapAlloc)) + } + } +} + func main() { go func() { http.ListenAndServe("0.0.0.0:8899", nil) @@ -55,6 +75,11 @@ func main() { } defer pprof.StopCPUProfile() + // Track heap in use + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go TrackSysMemUsage(ctx) + globalCfg := config.Must(config.LoadGlobalConfig(os.Args[1:], nil)) logToFile := globalCfg.App.File != "" && globalCfg.App.File != "-" if logToFile { From 15f4aa0b573d5d374d1c82864b6be470e550a035 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 26 Dec 2024 14:54:37 +0800 Subject: [PATCH 19/93] Fix read from S3 --- pkg/lightning/mydump/parquet_parser.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 4ec40950fdf20..9f1cc8e317c92 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -279,9 +279,17 @@ func (pf *parquetFileWrapper) ReadAt(p []byte, off int64) (int, error) { } } - n, err := pf.Read(p) - pf.lastOff = off + int64(n) - return n, err + read := 0 + for read < len(p) { + n, err := pf.Read(p[read:]) + read += n + pf.lastOff = off + int64(n) + if err != nil { + return read, err + } + } + + return read, nil } // Seek implemement Seeker interface From 1dc2e9427d6deb331cb4be388af9eadf0ddd154c Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 27 Dec 2024 18:15:28 +0800 Subject: [PATCH 20/93] Update --- go.mod | 2 +- go.sum | 4 +- lightning/pkg/importer/chunk_process.go | 7 +- lightning/pkg/importer/chunk_process_test.go | 6 +- lightning/pkg/importer/dup_detect.go | 2 +- lightning/pkg/importer/get_pre_info.go | 4 +- lightning/pkg/importer/table_import.go | 26 ++- pkg/executor/importer/import.go | 1 + pkg/lightning/mydump/loader.go | 33 ++-- pkg/lightning/mydump/loader_test.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 170 +++++++++++++++++-- 11 files changed, 207 insertions(+), 50 deletions(-) diff --git a/go.mod b/go.mod index e117ed2f4073d..7c7db5fd8d98c 100644 --- a/go.mod +++ b/go.mod @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d +require github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113 require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index 28013552f593e..801f397b74391 100644 --- a/go.sum +++ b/go.sum @@ -510,8 +510,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d h1:1uBoAYSaHFH+dOAGmcDlLEuV5zoCbm48q128LRD/fH8= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241216023057-f9949aab8c2d/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113 h1:gaUhamPZuxAcha0gbbg/iN4BO3nLK2OnyQHb1wzrtRo= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index 40597bac3fab4..a0ab3ddb6205c 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -20,6 +20,7 @@ import ( "io" "time" + pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/membuf" @@ -66,8 +67,9 @@ func newChunkProcessor( ioWorkers *worker.Pool, store storage.ExternalStorage, tableInfo *model.TableInfo, + allocator pmemory.Allocator, ) (*chunkProcessor, error) { - parser, err := openParser(ctx, cfg, chunk, ioWorkers, store, tableInfo) + parser, err := openParser(ctx, cfg, chunk, ioWorkers, store, tableInfo, allocator) if err != nil { return nil, err } @@ -85,6 +87,7 @@ func openParser( ioWorkers *worker.Pool, store storage.ExternalStorage, tblInfo *model.TableInfo, + allocator pmemory.Allocator, ) (mydump.Parser, error) { blockBufSize := int64(cfg.Mydumper.ReadBlockSize) reader, err := mydump.OpenReader(ctx, &chunk.FileMeta, store, storage.DecompressConfig{ @@ -110,7 +113,7 @@ func openParser( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, cfg.TiDB.SQLMode, reader, blockBufSize, ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, store, reader, chunk.FileMeta.Path) + parser, err = mydump.NewParquetParser(ctx, store, reader, allocator, chunk.FileMeta.Path) if err != nil { return nil, err } diff --git a/lightning/pkg/importer/chunk_process_test.go b/lightning/pkg/importer/chunk_process_test.go index 32afc724e95ea..7d17154caaa9a 100644 --- a/lightning/pkg/importer/chunk_process_test.go +++ b/lightning/pkg/importer/chunk_process_test.go @@ -86,7 +86,7 @@ func (s *chunkRestoreSuite) SetupTest() { } var err error - s.cr, err = newChunkProcessor(context.Background(), 1, s.cfg, &chunk, w, s.store, nil) + s.cr, err = newChunkProcessor(context.Background(), 1, s.cfg, &chunk, w, s.store, nil, nil) require.NoError(s.T(), err) } @@ -768,7 +768,7 @@ func TestCompressChunkRestore(t *testing.T) { cfg.App.TableConcurrency = 2 cfg.Mydumper.CSV.Header = false - cr, err := newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil) + cr, err := newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil, nil) require.NoError(t, err) var ( id, lastID int @@ -800,7 +800,7 @@ func TestCompressChunkRestore(t *testing.T) { RowIDMax: 100, }, } - cr, err = newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil) + cr, err = newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil, nil) require.NoError(t, err) for id = lastID; id < 300; { err = cr.parser.ReadRow() diff --git a/lightning/pkg/importer/dup_detect.go b/lightning/pkg/importer/dup_detect.go index e5167246a93d9..a1115fb36c7e3 100644 --- a/lightning/pkg/importer/dup_detect.go +++ b/lightning/pkg/importer/dup_detect.go @@ -203,7 +203,7 @@ func (d *dupDetector) addKeysByChunk( adder *duplicate.KeyAdder, chunk *checkpoints.ChunkCheckpoint, ) error { - parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core) + parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core, nil) if err != nil { return err } diff --git a/lightning/pkg/importer/get_pre_info.go b/lightning/pkg/importer/get_pre_info.go index 77c6d6558c8e4..a1565389fd058 100644 --- a/lightning/pkg/importer/get_pre_info.go +++ b/lightning/pkg/importer/get_pre_info.go @@ -489,7 +489,7 @@ func (p *PreImportInfoGetterImpl) ReadFirstNRowsByFileMeta(ctx context.Context, case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, dataFileMeta.Path) + parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, nil, dataFileMeta.Path) if err != nil { return nil, nil, errors.Trace(err) } @@ -659,7 +659,7 @@ func (p *PreImportInfoGetterImpl) sampleDataFromTable( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, sampleFile.Path) + parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, nil, sampleFile.Path) if err != nil { return 0.0, false, errors.Trace(err) } diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index d6198da4ac267..41b52d4c262d9 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -28,6 +28,7 @@ import ( "time" dmysql "github.com/go-sql-driver/mysql" + pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/membuf" @@ -63,6 +64,8 @@ import ( ) var memLimiter *membuf.Limiter +var memoryForAllocator int +var memoryAllocator pmemory.Allocator func init() { memTotal, err := memory.MemTotal() @@ -71,7 +74,11 @@ func init() { memTotal = math.MaxInt32 } // TODO(joechenrh): set a more proper waterline - memLimiter = membuf.NewLimiter(int(memTotal / 5 * 4)) + memoryForAllocator = int(memTotal / 2) + memLimiter = membuf.NewLimiter(memoryForAllocator) + allocator := &pmemory.BuddyAllocator{} + allocator.Init(memoryForAllocator) + memoryAllocator = allocator } // TableImporter is a helper struct to import a table. @@ -724,8 +731,6 @@ func (tr *TableImporter) preprocessEngine( metrics, _ := metric.FromContext(ctx) - maxMemoryUsage := 0 - // Restore table data ChunkLoop: for chunkIndex, chunk := range cp.Chunks { @@ -793,7 +798,7 @@ ChunkLoop: setError(err) break } - cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) + cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core, memoryAllocator) if err != nil { setError(err) break @@ -801,21 +806,10 @@ ChunkLoop: // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { - // To avoid OOM during file opening, - // we have to ensure that we have enough memory budget before reading. - // Here we use the maximum memory usage we have ever seen as the memory usage estimation. - memLimiter.Acquire(maxMemoryUsage) - pp := cr.parser.(*mydump.ParquetParser) - if _, err := pp.ReadRows(64); err != nil { - return nil, errors.Trace(err) - } - memoryUsage := int(pp.GetMemoryUage()) - memLimiter.Release(maxMemoryUsage) - + memoryUsage := tr.tableMeta.DataFiles[0].FileMeta.MemoryUsage memLimiter.Acquire(memoryUsage) cr.memLimiter = memLimiter cr.memoryUsage = memoryUsage - maxMemoryUsage = max(maxMemoryUsage, memoryUsage) } restoreWorker := rc.regionWorkers.Apply() diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 6ef74ba91f0a3..f30eb02affb14 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1229,6 +1229,7 @@ func (e *LoadDataController) GetParser( ctx, e.dataStore, reader, + nil, dataFileInfo.Remote.Path, ) } diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index ff6ed08d7e74f..7a95529f10310 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -100,8 +100,9 @@ type SourceFileMeta struct { // If the file is compressed, RealSize is the estimated uncompressed size. // If the file is parquet, RealSize is the estimated data size after convert // to row oriented storage. - RealSize int64 - Rows int64 // only for parquet + RealSize int64 + Rows int64 // only for parquet + MemoryUsage int // only for parquet } // NewMDTableMeta creates an Mydumper table meta with specified character set. @@ -246,7 +247,8 @@ type mdLoaderSetup struct { tableIndexMap map[filter.Table]int setupCfg *MDLoaderSetupConfig - sampledParquetRowSizes map[string]float64 + sampledParquetRowSizes map[string]float64 + sampledParquetMemoryUsage map[string]int } // NewLoader constructs a MyDumper loader that scanns the data source and constructs a set of metadatas. @@ -324,7 +326,8 @@ func NewLoaderWithStore(ctx context.Context, cfg LoaderConfig, tableIndexMap: make(map[filter.Table]int), setupCfg: mdLoaderSetupCfg, - sampledParquetRowSizes: make(map[string]float64), + sampledParquetRowSizes: make(map[string]float64), + sampledParquetMemoryUsage: make(map[string]int), } if err := setup.setup(ctx); err != nil { @@ -540,7 +543,8 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size case SourceTypeParquet: tableName := info.TableName.String() if s.sampledParquetRowSizes[tableName] == 0 { - s.sampledParquetRowSizes[tableName], err = SampleParquetRowSize(ctx, info.FileMeta, s.loader.GetStore()) + s.sampledParquetRowSizes[tableName], s.sampledParquetMemoryUsage[tableName], err = + SampleParquetRowSize(ctx, info.FileMeta, s.loader.GetStore()) if err != nil { logger.Error("fail to sample parquet row size", zap.String("category", "loader"), zap.String("schema", res.Schema), zap.String("table", res.Name), @@ -561,6 +565,7 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size if m, ok := metric.FromContext(ctx); ok { m.RowsCounter.WithLabelValues(metric.StateTotalRestore, tableName).Add(float64(totalRowCount)) } + info.FileMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] } s.tableDatas = append(s.tableDatas, info) } @@ -847,21 +852,21 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store } // SampleParquetRowSize samples row size of the parquet file. -func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (float64, error) { +func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (float64, int, error) { totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta) if totalRowCount == 0 || err != nil { - return 0, err + return 0, 0, err } reader, err := store.Open(ctx, fileMeta.Path, nil) if err != nil { - return 0, err + return 0, 0, err } - parser, err := NewParquetParser(ctx, store, reader, fileMeta.Path) + parser, err := NewParquetParserForSampling(ctx, store, reader, fileMeta.Path) if err != nil { //nolint: errcheck reader.Close() - return 0, err + return 0, 0, err } //nolint: errcheck defer parser.Close() @@ -876,7 +881,7 @@ func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store st if errors.Cause(err) == io.EOF { break } - return 0, err + return 0, 0, err } lastRow := parser.LastRow() rowCount++ @@ -886,5 +891,9 @@ func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store st break } } - return float64(rowSize) / float64(rowCount), nil + + avgRowSize := float64(rowSize) / float64(rowCount) + memoryUsage := parser.GetMemoryUage() + + return avgRowSize, memoryUsage, nil } diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 928b842725309..9686c55621f4c 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1159,7 +1159,7 @@ func testSampleParquetDataSize(t *testing.T, count int) { err = store.WriteFile(ctx, fileName, bf.Bytes()) require.NoError(t, err) - rowSize, err := md.SampleParquetRowSize(ctx, md.SourceFileMeta{ + rowSize, _, err := md.SampleParquetRowSize(ctx, md.SourceFileMeta{ Path: fileName, }, store) require.NoError(t, err) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 9f1cc8e317c92..23f618ee8d869 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -329,7 +329,7 @@ type ParquetParser struct { colMetas []convertedType columnNames []string - alloc *allocatorWithStats + alloc memory.Allocator // colBuffers is used to store raw data read from parquet columns. // rows stores the actual data after parsing. @@ -353,14 +353,48 @@ type ParquetParser struct { } // GetMemoryUage estimate the memory usage for this file. -func (pp *ParquetParser) GetMemoryUage() int64 { - // The reason for multiplying by six is as follows: - // 1. The file reader requires a buffer to accommodate at least one data page. - // 2. The page reader needs two buffers: one for storing compressed data and another for uncompressed data. - // 3. Only the uncompressed data will be recorded in the parser. - // 4. When moving to the next row group, we may allocate three additional buffers. - // Therefore, we multiply the memory usage by six to estimate the memory usage. - return pp.alloc.Allocated() * 6 +func (pp *ParquetParser) GetMemoryUage() int { + // Initialize column reader + if pp.dumpers[0].reader == nil { + pp.ReadRow() + } + + // All the columns share the same data page size, + // so we only need to read one column chunk. + dumper := pp.dumpers[0] + for true { + read := dumper.readNextBatch(defaultBatchSize) + if read == 0 { + break + } + } + + alloc, ok := pp.alloc.(*sampleAllocator) + if !ok { + return 0 + } + bufSizes := alloc.allocated + + // We have collected all the allocation for one column chunk. + // The allocation order are: + // read buffer, decompressed dict buffer, compressed buffer, decompressed data page buffer, compressed data page buffer... + // and compressed buffer is released after decompression. + // So we estimate the memory usage as: + // (roundup(decompressed dict buffer) + roundup(decompressed data page buffer) + roundup(read buffer) + roundup(parquet read buffer)) * num_cols + + dictUsage := 0 + dataPageUsage := 0 + readBufferUsage := roundup(bufSizes[0]) + roundup(defaultBufSize) + if len(bufSizes) == 3 { + dataPageUsage = roundup(bufSizes[1]) + } else { + dictUsage = roundup(bufSizes[1]) + for i := 3; i < len(bufSizes); i += 2 { + dataPageUsage = max(bufSizes[i], dataPageUsage) + } + dataPageUsage = roundup(dataPageUsage) + } + return roundup(dataPageUsage+dictUsage+readBufferUsage) * len(pp.columnNames) } func (pp *ParquetParser) setStringData(readNum, col, offset int) { @@ -553,6 +587,15 @@ func (pp *ParquetParser) Init() error { return nil } +// resetReader is used to reclaim the memory used by the column reader. +func (pp *ParquetParser) resetReader() { + for _, d := range pp.dumpers { + if d.reader != nil { + d.reader.Reset() + } + } +} + // ReadRows read several rows internally and store them in the row buffer. func (pp *ParquetParser) ReadRows(num int) (int, error) { readNum := min(num, pp.totalRows-pp.curRows) @@ -564,6 +607,9 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { for read < readNum { // Move to next row group if pp.curRowInGroup == pp.totalRowsInGroup { + if pp.curRowGroup >= 0 { + pp.resetReader() + } pp.curRowGroup++ for c := 0; c < len(pp.dumpers); c++ { rowGroupReader := pp.readers[c].RowGroup(pp.curRowGroup) @@ -689,6 +735,7 @@ func (pp *ParquetParser) ScannedPos() (int64, error) { // Close closes the parquet file of the parser. // It implements the Parser interface. func (pp *ParquetParser) Close() error { + pp.resetReader() for _, r := range pp.readers { if err := r.Close(); err != nil { return errors.Trace(err) @@ -837,6 +884,7 @@ func NewParquetParser( ctx context.Context, store storage.ExternalStorage, r storage.ReadSeekCloser, + allocator memory.Allocator, path string, ) (*ParquetParser, error) { wrapper, ok := r.(*parquetFileWrapper) @@ -850,7 +898,105 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } - alloc := &allocatorWithStats{baseAllocator: memory.DefaultAllocator} + prop := parquet.NewReaderProperties(allocator) + prop.BufferedStreamEnabled = true + + reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) + if err != nil { + return nil, errors.Trace(err) + } + + fileSchema := reader.MetaData().Schema + columnMetas := make([]convertedType, fileSchema.NumColumns()) + columnNames := make([]string, 0, fileSchema.NumColumns()) + + for i := range columnMetas { + desc := reader.MetaData().Schema.Column(i) + columnNames = append(columnNames, strings.ToLower(desc.Name())) + + logicalType := desc.LogicalType() + if logicalType.IsValid() { + columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() + } else { + columnMetas[i].converted = desc.ConvertedType() + pnode, _ := desc.SchemaNode().(*schema.PrimitiveNode) + columnMetas[i].decimalMeta = pnode.DecimalMetadata() + } + } + + subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) + subreaders = append(subreaders, reader) + for i := 1; i < fileSchema.NumColumns(); i++ { + newWrapper, err := wrapper.Open("") + if err != nil { + return nil, errors.Trace(err) + } + reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + if err != nil { + return nil, errors.Trace(err) + } + subreaders = append(subreaders, reader) + } + + parser := &ParquetParser{ + readers: subreaders, + colMetas: columnMetas, + columnNames: columnNames, + alloc: allocator, + logger: log.FromContext(ctx), + } + parser.Init() + + return parser, nil +} + +type sampleAllocator struct { + allocated []int +} + +func (sa *sampleAllocator) Allocate(size int) []byte { + sa.allocated = append(sa.allocated, size) + return make([]byte, size) +} + +func (sa *sampleAllocator) Free(buf []byte) {} + +func (sa *sampleAllocator) Reallocate(size int, buf []byte) []byte { + sa.allocated = append(sa.allocated, size) + return make([]byte, size) +} + +func roundup(n int) int { + v := uint(n) + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + return max(int(v+1), 256<<10) +} + +// NewParquetParserForSampling generates a parquet parser used in sampling. +// The only difference is that we use a special allocator to track the memory allocation. +func NewParquetParserForSampling( + ctx context.Context, + store storage.ExternalStorage, + r storage.ReadSeekCloser, + path string, +) (*ParquetParser, error) { + wrapper, ok := r.(*parquetFileWrapper) + if !ok { + wrapper = &parquetFileWrapper{ + ReadSeekCloser: r, + store: store, + ctx: ctx, + path: path, + } + wrapper.InitBuffer(defaultBufSize) + } + + alloc := &sampleAllocator{} prop := parquet.NewReaderProperties(alloc) prop.BufferedStreamEnabled = true @@ -884,7 +1030,11 @@ func NewParquetParser( if err != nil { return nil, errors.Trace(err) } + + prop := parquet.NewReaderProperties(nil) + prop.BufferedStreamEnabled = true reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + if err != nil { return nil, errors.Trace(err) } From 8692b54d3787dc22f2a857b5a67d6954172b5c34 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 31 Dec 2024 14:05:46 +0800 Subject: [PATCH 21/93] Update go mod --- go.mod | 2 +- go.sum | 8 ++++++++ lightning/pkg/importer/chunk_process.go | 8 +++----- lightning/pkg/importer/dup_detect.go | 2 +- lightning/pkg/importer/get_pre_info.go | 4 ++-- lightning/pkg/importer/table_import.go | 9 +++------ pkg/executor/importer/import.go | 1 - pkg/lightning/mydump/parquet_parser.go | 8 ++++++-- 8 files changed, 24 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index 7c7db5fd8d98c..34cb1bf6939dd 100644 --- a/go.mod +++ b/go.mod @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113 +require github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index 801f397b74391..bfadda91fa69a 100644 --- a/go.sum +++ b/go.sum @@ -512,6 +512,14 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113 h1:gaUhamPZuxAcha0gbbg/iN4BO3nLK2OnyQHb1wzrtRo= github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241230101731-8134d78bce89 h1:fd4nBmlnLjWPZsCWbV6JbYGH+kEO5lRVyf6F+ZAiOLc= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241230101731-8134d78bce89/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231022458-5c5cd6bd0422 h1:56uGSEyQExsRjh4hnmOmJqDz0vb65pH72Dxsep1Abvw= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231022458-5c5cd6bd0422/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527 h1:SPTAgulop3tscwhH83DRM3eA3BnlnJUuMZaS7UtE5a4= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a h1:TRWmL6aSpSj8MPZLc4cHpGxQUivvhAOEqxKos8cUVIU= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index a0ab3ddb6205c..533cb8ab8cd09 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -20,7 +20,6 @@ import ( "io" "time" - pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/membuf" @@ -67,9 +66,8 @@ func newChunkProcessor( ioWorkers *worker.Pool, store storage.ExternalStorage, tableInfo *model.TableInfo, - allocator pmemory.Allocator, ) (*chunkProcessor, error) { - parser, err := openParser(ctx, cfg, chunk, ioWorkers, store, tableInfo, allocator) + parser, err := openParser(ctx, cfg, chunk, ioWorkers, store, tableInfo) if err != nil { return nil, err } @@ -87,7 +85,7 @@ func openParser( ioWorkers *worker.Pool, store storage.ExternalStorage, tblInfo *model.TableInfo, - allocator pmemory.Allocator, + ) (mydump.Parser, error) { blockBufSize := int64(cfg.Mydumper.ReadBlockSize) reader, err := mydump.OpenReader(ctx, &chunk.FileMeta, store, storage.DecompressConfig{ @@ -113,7 +111,7 @@ func openParser( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, cfg.TiDB.SQLMode, reader, blockBufSize, ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, store, reader, allocator, chunk.FileMeta.Path) + parser, err = mydump.NewParquetParser(ctx, store, reader, chunk.FileMeta.Path) if err != nil { return nil, err } diff --git a/lightning/pkg/importer/dup_detect.go b/lightning/pkg/importer/dup_detect.go index a1115fb36c7e3..e5167246a93d9 100644 --- a/lightning/pkg/importer/dup_detect.go +++ b/lightning/pkg/importer/dup_detect.go @@ -203,7 +203,7 @@ func (d *dupDetector) addKeysByChunk( adder *duplicate.KeyAdder, chunk *checkpoints.ChunkCheckpoint, ) error { - parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core, nil) + parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core) if err != nil { return err } diff --git a/lightning/pkg/importer/get_pre_info.go b/lightning/pkg/importer/get_pre_info.go index a1565389fd058..77c6d6558c8e4 100644 --- a/lightning/pkg/importer/get_pre_info.go +++ b/lightning/pkg/importer/get_pre_info.go @@ -489,7 +489,7 @@ func (p *PreImportInfoGetterImpl) ReadFirstNRowsByFileMeta(ctx context.Context, case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, nil, dataFileMeta.Path) + parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, dataFileMeta.Path) if err != nil { return nil, nil, errors.Trace(err) } @@ -659,7 +659,7 @@ func (p *PreImportInfoGetterImpl) sampleDataFromTable( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, nil, sampleFile.Path) + parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, sampleFile.Path) if err != nil { return 0.0, false, errors.Trace(err) } diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 41b52d4c262d9..ccb544e5a9484 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -65,7 +65,6 @@ import ( var memLimiter *membuf.Limiter var memoryForAllocator int -var memoryAllocator pmemory.Allocator func init() { memTotal, err := memory.MemTotal() @@ -74,11 +73,9 @@ func init() { memTotal = math.MaxInt32 } // TODO(joechenrh): set a more proper waterline - memoryForAllocator = int(memTotal / 2) + memoryForAllocator = int(memTotal / 5 * 4) memLimiter = membuf.NewLimiter(memoryForAllocator) - allocator := &pmemory.BuddyAllocator{} - allocator.Init(memoryForAllocator) - memoryAllocator = allocator + pmemory.SetMaxMemoryUsage(memoryForAllocator) } // TableImporter is a helper struct to import a table. @@ -798,7 +795,7 @@ ChunkLoop: setError(err) break } - cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core, memoryAllocator) + cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { setError(err) break diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index f30eb02affb14..6ef74ba91f0a3 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1229,7 +1229,6 @@ func (e *LoadDataController) GetParser( ctx, e.dataStore, reader, - nil, dataFileInfo.Remote.Path, ) } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 23f618ee8d869..dbbdbe7e1723a 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -394,7 +394,7 @@ func (pp *ParquetParser) GetMemoryUage() int { } dataPageUsage = roundup(dataPageUsage) } - return roundup(dataPageUsage+dictUsage+readBufferUsage) * len(pp.columnNames) + return (dataPageUsage + dictUsage + readBufferUsage) * len(pp.columnNames) } func (pp *ParquetParser) setStringData(readNum, col, offset int) { @@ -741,6 +741,9 @@ func (pp *ParquetParser) Close() error { return errors.Trace(err) } } + if buddy, ok := pp.alloc.(*memory.BuddyAllocator); ok { + buddy.Close() + } return nil } @@ -884,7 +887,6 @@ func NewParquetParser( ctx context.Context, store storage.ExternalStorage, r storage.ReadSeekCloser, - allocator memory.Allocator, path string, ) (*ParquetParser, error) { wrapper, ok := r.(*parquetFileWrapper) @@ -898,6 +900,8 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } + allocator := &memory.BuddyAllocator{} + allocator.Init(2 << 30) prop := parquet.NewReaderProperties(allocator) prop.BufferedStreamEnabled = true From 97d72f14c6f9edf4845511a347b4f106a9fc8498 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 31 Dec 2024 14:07:34 +0800 Subject: [PATCH 22/93] Fix --- lightning/pkg/importer/chunk_process_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightning/pkg/importer/chunk_process_test.go b/lightning/pkg/importer/chunk_process_test.go index 7d17154caaa9a..32afc724e95ea 100644 --- a/lightning/pkg/importer/chunk_process_test.go +++ b/lightning/pkg/importer/chunk_process_test.go @@ -86,7 +86,7 @@ func (s *chunkRestoreSuite) SetupTest() { } var err error - s.cr, err = newChunkProcessor(context.Background(), 1, s.cfg, &chunk, w, s.store, nil, nil) + s.cr, err = newChunkProcessor(context.Background(), 1, s.cfg, &chunk, w, s.store, nil) require.NoError(s.T(), err) } @@ -768,7 +768,7 @@ func TestCompressChunkRestore(t *testing.T) { cfg.App.TableConcurrency = 2 cfg.Mydumper.CSV.Header = false - cr, err := newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil, nil) + cr, err := newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil) require.NoError(t, err) var ( id, lastID int @@ -800,7 +800,7 @@ func TestCompressChunkRestore(t *testing.T) { RowIDMax: 100, }, } - cr, err = newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil, nil) + cr, err = newChunkProcessor(ctx, 1, cfg, &chunk, w, store, nil) require.NoError(t, err) for id = lastID; id < 300; { err = cr.parser.ReadRow() From 0a4090e4ac27415047ae0895c01a39175ec58121 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 2 Jan 2025 10:15:12 +0800 Subject: [PATCH 23/93] Add new configuration --- go.mod | 2 +- go.sum | 6 +++++ lightning/cmd/tidb-lightning/main.go | 6 +++-- lightning/pkg/importer/import.go | 7 ++++++ lightning/pkg/importer/table_import.go | 31 ++++++++++++++++++++------ pkg/lightning/config/config.go | 16 ++++++++++++- pkg/lightning/mydump/parquet_parser.go | 2 +- 7 files changed, 58 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 34cb1bf6939dd..6e6e5a149ff02 100644 --- a/go.mod +++ b/go.mod @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a +require github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index bfadda91fa69a..10fd57dd8f2a9 100644 --- a/go.sum +++ b/go.sum @@ -520,6 +520,12 @@ github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527 h1:SPTAgul github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a h1:TRWmL6aSpSj8MPZLc4cHpGxQUivvhAOEqxKos8cUVIU= github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231081713-7bace6c313a2 h1:VlNIeUAhBh6+3N+Cs9Jn4t77dnp/o3AZvVyPJVC/u0A= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231081713-7bace6c313a2/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231083420-6131e1087c61 h1:WbgGeWZpukO65agrWVlAzvu4PDk4fyEEkeRi5Reh/XQ= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231083420-6131e1087c61/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e h1:7iY3f14jAyh+dwSy92IPyxoYg2BWn46O2uPOR3fpO5k= +github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/cmd/tidb-lightning/main.go b/lightning/cmd/tidb-lightning/main.go index 3166a2280489d..17535171e853d 100644 --- a/lightning/cmd/tidb-lightning/main.go +++ b/lightning/cmd/tidb-lightning/main.go @@ -42,7 +42,7 @@ func bToMb(b uint64) uint64 { } func TrackSysMemUsage(ctx context.Context) { - tick := time.NewTicker(3 * time.Second) + tick := time.NewTicker(time.Second) for { select { case <-ctx.Done(): @@ -50,7 +50,9 @@ func TrackSysMemUsage(ctx context.Context) { case <-tick.C: var m runtime.MemStats runtime.ReadMemStats(&m) - fmt.Printf("HeapInUse = %v MiB, HeapAlloc = %v MiB\n", bToMb(m.HeapInuse), bToMb(m.HeapAlloc)) + + fmt.Printf("HeapInUse = %v MiB, limit = %d MiB, canReturn = %dMiB\n", + bToMb(m.HeapInuse), bToMb(m.Sys-m.HeapReleased), bToMb(m.HeapIdle-m.HeapReleased)) } } } diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index 498ee32cca634..001e32c1371e7 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -28,6 +28,7 @@ import ( "github.com/coreos/go-semver/semver" "github.com/docker/go-units" "github.com/google/uuid" + pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" @@ -543,6 +544,8 @@ func (rc *Controller) Close() { func (rc *Controller) Run(ctx context.Context) error { failpoint.Inject("beforeRun", func() {}) + setMemoryLimitForParquet(rc.cfg.App.MaxMemoryUsage) + opts := []func(context.Context) error{ rc.setGlobalVariables, rc.restoreSchema, @@ -1538,6 +1541,10 @@ func (rc *Controller) importTables(ctx context.Context) (finalErr error) { default: } + // All tables are read, we can free memory used for parquet. + logTask.Info("Read table done, free memory and call GC") + pmemory.FreeMemory() + postProgress = func() error { close(postProcessTaskChan) // otherwise, we should run all tasks in the post-process task chan diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index ccb544e5a9484..120177c0ce663 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -63,19 +63,18 @@ import ( "google.golang.org/grpc/status" ) -var memLimiter *membuf.Limiter -var memoryForAllocator int +var memLimit int // memory limit for parquet reader +var memLimiter *membuf.Limiter // memory limiter for parquet reader -func init() { +func setMemoryLimitForParquet(percent int) { memTotal, err := memory.MemTotal() if err != nil { // Set limit to int max, which means no limiter memTotal = math.MaxInt32 } - // TODO(joechenrh): set a more proper waterline - memoryForAllocator = int(memTotal / 5 * 4) - memLimiter = membuf.NewLimiter(memoryForAllocator) - pmemory.SetMaxMemoryUsage(memoryForAllocator) + memLimit = int(memTotal) * min(percent, 100) / 100 + memLimiter = membuf.NewLimiter(memLimit) + pmemory.SetMaxMemoryUsage(memLimit) } // TableImporter is a helper struct to import a table. @@ -804,6 +803,24 @@ ChunkLoop: // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { memoryUsage := tr.tableMeta.DataFiles[0].FileMeta.MemoryUsage + + // If memory usage is larger than memory limit, set memory usage + // to limit to block other file import. + if memoryUsage > memLimit { + tr.logger.Warn("Memory usage larger than limit", + zap.String("file", chunk.FileMeta.Path), + zap.String("memory usage", fmt.Sprintf("%d MB", 4990697472>>20)), + zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), + ) + memoryUsage = memLimit + } else { + tr.logger.Info("Get memory limit", + zap.String("file", chunk.FileMeta.Path), + zap.String("memory usage", fmt.Sprintf("%d MB", 4990697472>>20)), + zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), + ) + } + memLimiter.Acquire(memoryUsage) cr.memLimiter = memLimiter cr.memoryUsage = memoryUsage diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 9d00945ac68d5..b2b862fda8c70 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -68,6 +68,11 @@ const ( KVWriteBatchSize = 16 * units.KiB DefaultRangeConcurrency = 16 + // For TiDB mode, inserting record to table may consume many memory, + // so we set a lower memory limit. + defaultMemoryUsageTiDB = 40 + defaultMemoryUsageLocal = 80 + defaultDistSQLScanConcurrency = 15 defaultBuildStatsConcurrency = 20 defaultIndexSerialScanConcurrency = 20 @@ -322,6 +327,9 @@ type Lightning struct { CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"` + // max memory used for memory arena used for parquet file + MaxMemoryUsage int `toml:"max-memory-usage" json:"max-memory-usage"` + MaxError MaxError `toml:"max-error" json:"max-error"` // deprecated, use Conflict.MaxRecordRows instead MaxErrorRecords int64 `toml:"max-error-records" json:"max-error-records"` @@ -339,6 +347,9 @@ func (l *Lightning) adjust(i *TikvImporter) { if l.IndexConcurrency == 0 { l.IndexConcurrency = l.RegionConcurrency } + if l.MaxMemoryUsage == 0 { + l.MaxMemoryUsage = defaultMemoryUsageTiDB + } case BackendLocal: if l.IndexConcurrency == 0 { l.IndexConcurrency = defaultIndexConcurrency @@ -346,7 +357,9 @@ func (l *Lightning) adjust(i *TikvImporter) { if l.TableConcurrency == 0 { l.TableConcurrency = DefaultTableConcurrency } - + if l.MaxMemoryUsage == 0 { + l.MaxMemoryUsage = defaultMemoryUsageLocal + } if len(l.MetaSchemaName) == 0 { l.MetaSchemaName = defaultMetaSchemaName } @@ -1442,6 +1455,7 @@ func NewConfig() *Config { RegionConcurrency: runtime.NumCPU(), TableConcurrency: 0, IndexConcurrency: 0, + MaxMemoryUsage: 0, IOConcurrency: 5, CheckRequirements: true, TaskInfoSchemaName: defaultTaskInfoSchemaName, diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index dbbdbe7e1723a..41ff013142068 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -400,7 +400,7 @@ func (pp *ParquetParser) GetMemoryUage() int { func (pp *ParquetParser) setStringData(readNum, col, offset int) { buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetString(buf[i].String(), "utf8mb4_bin") + pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") } } From 3d00736f57b759a3dbc7084afcf87bc06c4c5ffc Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 2 Jan 2025 14:55:26 +0800 Subject: [PATCH 24/93] Adjust gc percent --- DEPS.bzl | 12 +- lightning/pkg/importer/import.go | 3 +- lightning/pkg/importer/table_import.go | 13 +- pkg/lightning/mydump/BUILD.bazel | 2 + pkg/lightning/mydump/buddy_allocator.go | 622 ++++++++++++++++++++++++ pkg/lightning/mydump/parquet_parser.go | 6 +- 6 files changed, 640 insertions(+), 18 deletions(-) create mode 100644 pkg/lightning/mydump/buddy_allocator.go diff --git a/DEPS.bzl b/DEPS.bzl index fa3a22090df89..0d7aec3cc3120 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -4121,13 +4121,13 @@ def go_deps(): name = "com_github_joechenrh_arrow_go_v18", build_file_proto_mode = "disable_global", importpath = "github.com/joechenrh/arrow-go/v18", - sha256 = "6569ded6b245e1f44cbd3c2e75187040372dfba9111b7f1c9d4e301b86d3316d", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20241216023057-f9949aab8c2d", + sha256 = "b326b7a449c131368eb5b9b5c5629ed827c2aa22579abfc786cc7ea2d1c42229", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20241231101551-0f1e67b25b9e", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241216023057-f9949aab8c2d.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", ], ) go_repository( diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index 001e32c1371e7..56a236d76a4a9 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -28,7 +28,6 @@ import ( "github.com/coreos/go-semver/semver" "github.com/docker/go-units" "github.com/google/uuid" - pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" @@ -1543,7 +1542,7 @@ func (rc *Controller) importTables(ctx context.Context) (finalErr error) { // All tables are read, we can free memory used for parquet. logTask.Info("Read table done, free memory and call GC") - pmemory.FreeMemory() + mydump.FreeMemory() postProgress = func() error { close(postProcessTaskChan) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 120177c0ce663..f5c6919bc4d7c 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -28,7 +28,6 @@ import ( "time" dmysql "github.com/go-sql-driver/mysql" - pmemory "github.com/joechenrh/arrow-go/v18/arrow/memory" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/membuf" @@ -74,7 +73,7 @@ func setMemoryLimitForParquet(percent int) { } memLimit = int(memTotal) * min(percent, 100) / 100 memLimiter = membuf.NewLimiter(memLimit) - pmemory.SetMaxMemoryUsage(memLimit) + mydump.SetMaxMemoryUsage(memLimit) } // TableImporter is a helper struct to import a table. @@ -803,27 +802,27 @@ ChunkLoop: // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { memoryUsage := tr.tableMeta.DataFiles[0].FileMeta.MemoryUsage + memLimiter.Acquire(memoryUsage) + cr.memLimiter = memLimiter + cr.memoryUsage = memoryUsage // If memory usage is larger than memory limit, set memory usage // to limit to block other file import. if memoryUsage > memLimit { tr.logger.Warn("Memory usage larger than limit", zap.String("file", chunk.FileMeta.Path), - zap.String("memory usage", fmt.Sprintf("%d MB", 4990697472>>20)), + zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), ) memoryUsage = memLimit } else { tr.logger.Info("Get memory limit", zap.String("file", chunk.FileMeta.Path), - zap.String("memory usage", fmt.Sprintf("%d MB", 4990697472>>20)), + zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), ) } - memLimiter.Acquire(memoryUsage) - cr.memLimiter = memLimiter - cr.memoryUsage = memoryUsage } restoreWorker := rc.regionWorkers.Apply() diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 88fac57fe9c3c..5d1e84d2fa6f0 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "mydump", srcs = [ + "buddy_allocator.go", "bytes.go", "charset_convertor.go", "csv_parser.go", @@ -33,6 +34,7 @@ go_library( "//pkg/types", "//pkg/util", "//pkg/util/filter", + "//pkg/util/memory", "//pkg/util/regexpr-router", "//pkg/util/set", "//pkg/util/slice", diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go new file mode 100644 index 0000000000000..cc41920f27ca8 --- /dev/null +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -0,0 +1,622 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "context" + "fmt" + "os" + "runtime" + "runtime/debug" + "sync" + "sync/atomic" + "time" + "unsafe" + + "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/util/memory" + "go.uber.org/zap" +) + +var ( + maxArenaCount = 0 // maximum arena count + arenaDefaultSize = 1 << 30 // size of each arena + leafSize = 256 << 10 // The smallest block size is 256KB +) + +// SetMaxMemoryUsage set the memory used by parquet reader. +func SetMaxMemoryUsage(size int) { + maxArenaCount = size / arenaDefaultSize +} + +// arenaPool is used to cache and reuse arenas +type arenaPool struct { + arenas chan *internalAllocator + allocated int + lock sync.Mutex +} + +func (ap *arenaPool) get() *internalAllocator { + // First try to get cached arena + select { + case a := <-ap.arenas: + return a + default: + } + + ap.lock.Lock() + defer ap.lock.Unlock() + + // Create a new one and return + if ap.allocated < maxArenaCount { + ap.allocated++ + bd := &internalAllocator{} + bd.init(arenaDefaultSize) + ap.adjustGCPercent() + return bd + } + + // We can't create new arena, return nil + return nil +} + +func (ap *arenaPool) put(a *internalAllocator) { + ap.lock.Lock() + defer ap.lock.Unlock() + + // discard it if necessary + if ap.allocated > maxArenaCount { + a.bufInfo = nil + a.buffer = nil + ap.allocated-- + ap.adjustGCPercent() + return + } + + ap.arenas <- a +} + +func (ap *arenaPool) free() { + ap.lock.Lock() + defer ap.lock.Unlock() + + ap.allocated = 0 + for len(ap.arenas) > 0 { + a := <-ap.arenas + a.bufInfo = nil + a.buffer = nil + } + ap.adjustGCPercent() +} + +func (ap *arenaPool) adjustGCPercent() { + gogc := os.Getenv("GOGC") + memTotal, err := memory.MemTotal() + if gogc == "" && err == nil { + percent := int(memTotal) * 100 / max(ap.allocated*arenaDefaultSize, 1) + percent = min(percent, 100) / 10 * 10 + old := debug.SetGCPercent(percent) + runtime.GC() + log.L().Debug("set gc percentage", zap.Int("old", old), zap.Int("new", percent)) + } +} + +var pool = &arenaPool{ + allocated: 0, + arenas: make(chan *internalAllocator, 256), +} + +// FreeMemory free all the memory allocated for arenas. +// TODO(joechenrh): check if there are anyone using the arenas. +func FreeMemory() { + pool.free() +} + +// Convert slice to an uintptr. This value is used as key in map. +func unsafeGetblkAddr(slice []byte) uintptr { + return uintptr(unsafe.Pointer(&slice[0])) +} + +func roundUp(n, sz int) int { + return (n + sz - 1) / sz * sz +} + +// Compute block size at layer l +func blkSize(l int) int { + return (1 << l) * leafSize +} + +// Compute the block index for offset at layer l +func blkIndex(l, offset int) int { + return offset / blkSize(l) +} + +// Compute the first block index at layer l after offset +func blkIndexNext(l, offset int) int { + blkSize := blkSize(l) + bi := offset / blkSize + if offset%blkSize != 0 { + bi++ + } + return bi +} + +// Convert a block index at layer l back into an offset +func blkAddr(l, bi int) int { + return bi * blkSize(l) +} + +// Return 1 if bit at position index in array is set to 1 +func bitIsSet(arr []byte, index int) bool { + b := int(arr[index/8]) + m := (1 << (index % 8)) + return (b & m) == m +} + +// Set bit at position index in array to 1 +func bitSet(arr []byte, index int) { + b := int(arr[index/8]) + m := (1 << (index % 8)) + arr[index/8] = byte(b | m) +} + +// Clear bit at position index in array +func bitClear(arr []byte, index int) { + b := int(arr[index/8]) + m := (1 << (index % 8)) + arr[index/8] = byte(b & ^m) +} + +// Return the first layer whose block size is larger than n +func firstLayer(n int) int { + l := 0 + for size := leafSize; size < n; size *= 2 { + l++ + } + return l +} + +// The allocator has bufferInfo for each size k. Each bufferInfo has a free +// list, an array alloc to keep track which blocks have been +// allocated, and an split array to to keep track which blocks have +// been split. The arrays are of type char (which is 1 byte), but the +// allocator uses 1 bit per block (thus, one char records the info of +// 8 blocks). +type bufferInfo struct { + alloc []byte + split []byte + canAllocate []byte + + l int + nblk int + freeCnt int +} + +func (binfo *bufferInfo) init(nblk, l int) { + sz := roundUp(nblk, 8) / 8 + binfo.canAllocate = make([]byte, nblk) + binfo.alloc = make([]byte, sz) + binfo.split = make([]byte, sz) + binfo.l = l +} + +// Remove buffer at offset in this layer as non-allocatable. +func (binfo *bufferInfo) remove(offset int) { + binfo.freeCnt-- + bitClear(binfo.canAllocate, blkIndex(binfo.l, offset)) +} + +// Check whether there are available buffer in this layer. +func (binfo *bufferInfo) empty() bool { + return binfo.freeCnt == 0 +} + +// Add buffer at offset in this layer as allocatable +func (binfo *bufferInfo) push(offset int) { + binfo.freeCnt++ + bitSet(binfo.canAllocate, blkIndex(binfo.l, offset)) +} + +// Get one free buffer in this layer +func (binfo *bufferInfo) pop() int { + for bi := 0; bi < binfo.nblk; bi++ { + if bitIsSet(binfo.canAllocate, bi) { + bitClear(binfo.canAllocate, bi) + binfo.freeCnt-- + return blkAddr(binfo.l, bi) + } + } + return -1 +} + +// buffer is represented as an offset. +type internalAllocator struct { + buffer []byte + bufInfo []bufferInfo + nLayers int + maxLayer int + + allocated map[uintptr]int + + allocatedBytes atomic.Int64 + unavailable int + total int +} + +// Find the layer of the block at offset +func (b *internalAllocator) layer(offset int) int { + for k := 0; k < b.maxLayer; k++ { + if bitIsSet(b.bufInfo[k+1].split, blkIndex(k+1, offset)) { + return k + } + } + return b.maxLayer +} + +// Allocate nbytes, but malloc won't return anything smaller than LeafSize +func (b *internalAllocator) allocateInternal(nbytes int) []byte { + // Find a free block >= nbytes, starting with lowest layer possible + fl := firstLayer(nbytes) + l := fl + for ; l < b.nLayers; l++ { + if !b.bufInfo[l].empty() { + break + } + } + + // No free blocks, allocation failed + if l == b.nLayers { + return nil + } + + // Found a block, pop it and potentially split it. + offset := b.bufInfo[l].pop() + bitSet(b.bufInfo[l].alloc, blkIndex(l, offset)) + for ; l > fl; l-- { + // Get the buddy buffer + qa := offset + blkSize(l-1) + // Split the block at layer l, mark it as splited. + // Mark half of the block at l - 1 as allocated, + // and put it into the free list at layer l-1. + bitSet(b.bufInfo[l].split, blkIndex(l, offset)) + bitSet(b.bufInfo[l-1].alloc, blkIndex(l-1, offset)) + b.bufInfo[l-1].push(qa) + } + + buf := b.buffer[offset : offset+nbytes] + b.allocatedBytes.Add(int64(blkSize(l))) + b.allocated[unsafeGetblkAddr(buf)] = offset + + return buf +} + +// free memory marked by p, which was earlier allocated using Malloc +func (b *internalAllocator) freeInternal(bs []byte) { + bs = bs[:1] + addr := unsafeGetblkAddr(bs) + offset, ok := b.allocated[addr] + if !ok { + return + } + + l := b.layer(offset) + + b.allocatedBytes.Add(-int64(blkSize(l))) + delete(b.allocated, addr) + + // Start merge from layer l + for ; l < b.maxLayer; l++ { + // Find the buddy index at layer l + bi := blkIndex(l, offset) + buddy := bi + 1 + if bi%2 != 0 { + buddy = bi - 1 + } + + // Free p at layer l + bitClear(b.bufInfo[l].alloc, bi) + + // If buddy is allocated, break the merge + if bitIsSet(b.bufInfo[l].alloc, buddy) { + break + } + + // Buddy is free, merge with buddy and remove it from free list + buddyOffset := blkAddr(l, buddy) + b.bufInfo[l].remove(buddyOffset) + + // Update offset to the merged buffer at layer l+1 + if buddy%2 == 0 { + offset = buddyOffset + } + + // At layer l+1, mark that the merged buddy pair isn't split anymore + bitClear(b.bufInfo[l+1].split, blkIndex(l+1, offset)) + } + + // Add the final merged buffer to free list. + b.bufInfo[l].push(offset) + + b.sanityCheck() +} + +func (b *internalAllocator) freeAll() { + for _, offset := range b.allocated { + b.freeInternal(b.buffer[offset:]) + } + + if len(b.allocated) != 0 || b.allocatedBytes.Load() != 0 { + panic("freeAll error") + } +} + +/* + * Mark memory from [start, end), starting at layer 0, as allocated. + * + * start(leftbi) end rightBi + * | | | + * |--------|---------|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|--------|--------| + */ +func (b *internalAllocator) markAllocated(start, end int) { + for k := 0; k < b.nLayers; k++ { + leftBi := blkIndex(k, start) + rightBi := blkIndexNext(k, end) + for bi := leftBi; bi < rightBi; bi++ { + // if a block is allocated at size k, mark it as split too. + bitSet(b.bufInfo[k].split, bi) + bitSet(b.bufInfo[k].alloc, bi) + } + } +} + +// Mark the range outside [start, end) as allocated +func (b *internalAllocator) markUnavailable(start, end int) int { + heapSize := blkSize(b.maxLayer) + unavailableEnd := roundUp(heapSize-end, leafSize) + unavailableStart := roundUp(start, leafSize) + b.markAllocated(0, unavailableStart) + b.markAllocated(heapSize-unavailableEnd, heapSize) + return unavailableEnd + unavailableStart +} + +// If a block is marked as allocated and its buddy is free, put the +// buddy on the free list at layer l. +func (b *internalAllocator) initFreePair(l, bi int) (free int) { + buddy := bi + 1 + if bi%2 == 1 { + buddy = bi - 1 + } + + // one of the pair is free + if bitIsSet(b.bufInfo[l].alloc, bi) != bitIsSet(b.bufInfo[l].alloc, buddy) { + free = blkSize(l) + if bitIsSet(b.bufInfo[l].alloc, bi) { + b.bufInfo[l].push(blkAddr(l, buddy)) + } else { + b.bufInfo[l].push(blkAddr(l, bi)) + } + + } + return +} + +/* + * Initialize the free lists for each layer l. For each layer l, there + * are only two pairs that may have a buddy that should be on free list. + * + * start leftBi rightBi end + * | | | | + * |xxxxxxxx|xxxxxxxx|x-------|--------|--------|------xx|xxxxxxxx|xxxxxxxx| + */ +func (b *internalAllocator) initFree(left, right int) int { + free := 0 + + for l := 0; l < b.maxLayer; l++ { + nblk := 1 << (b.maxLayer - l) + leftBi := blkIndexNext(l, left) + rightBi := blkIndex(l, right) + + if leftBi < nblk { + free += b.initFreePair(l, leftBi) + } + if rightBi > leftBi && (leftBi/2 != rightBi/2) && rightBi < nblk { + free += b.initFreePair(l, rightBi) + } + } + + return free +} + +// Initialize the buddy allocator, assert totalSize is the power of 2. +func (b *internalAllocator) init(totalSize int) { + log2 := func(n int) int { + k := 0 + for n > 1 { + k++ + n = n >> 1 + } + return k + } + + // compute the number of sizes we need to manage totalSize + b.buffer = make([]byte, totalSize) + b.nLayers = log2(totalSize/leafSize) + 1 + if totalSize > blkSize(b.nLayers-1) { + b.nLayers++ // round up to the next power of 2 + } + b.maxLayer = b.nLayers - 1 + b.bufInfo = make([]bufferInfo, b.nLayers) + + // Initialize free list and allocate the alloc array for each size l. + // Also allocate the split array for each size l, l = 0 is not used. + // since we will not split blocks of size l = 0, the smallest size. + markedCount := 0 + for l := 0; l < b.nLayers; l++ { + nblk := 1 << (b.maxLayer - l) + sz := roundUp(nblk, 8) / 8 + b.bufInfo[l].canAllocate = b.buffer[markedCount : markedCount+sz] + markedCount += sz + b.bufInfo[l].alloc = b.buffer[markedCount : markedCount+sz] + markedCount += sz + b.bufInfo[l].split = b.buffer[markedCount : markedCount+sz] + markedCount += sz + b.bufInfo[l].l = l + b.bufInfo[l].nblk = nblk + } + + // Mark the memory in range [0, markedCount) and [totalSize, HeapSize) as allocated, + // where HeapSize = blkSize(maxLayer) + unavailable := b.markUnavailable(markedCount, totalSize) + // initialize free lists for each size k + free := b.initFree(0, blkSize(b.maxLayer)-unavailable) + b.unavailable = unavailable + b.total = blkSize(b.maxLayer) + + // check if the amount that is free is what we expect + if free != blkSize(b.maxLayer)-unavailable { + panic("Initialize allocator failed") + } + + b.allocated = make(map[uintptr]int, totalSize/leafSize) +} + +func (b *internalAllocator) sanityCheck() { + free := 0 + for _, binfo := range b.bufInfo { + blkSize := blkSize(binfo.l) + for bi := 0; bi < binfo.nblk; bi++ { + if bitIsSet(binfo.canAllocate, bi) { + free += blkSize + } + } + } + + alloc := 0 + for _, offset := range b.allocated { + alloc += blkSize(b.layer(offset)) + } + if alloc != int(b.allocatedBytes.Load()) { + panic("Sanity check failed") + } + + if free+int(b.allocatedBytes.Load())+b.unavailable != b.total { + panic("Sanity check failed") + } +} + +type buddyAllocator struct { + arenas []*internalAllocator + allocated map[uintptr]int + lock sync.Mutex + + allocatedOutside atomic.Int64 + allocatedOutsideNum atomic.Int64 + + ctx context.Context + cancel context.CancelFunc +} + +func (b *buddyAllocator) Init(_ int) { + b.allocated = make(map[uintptr]int, maxArenaCount) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + tick := time.NewTicker(2 * time.Second) + defer tick.Stop() + for { + select { + case <-tick.C: + var m runtime.MemStats + runtime.ReadMemStats(&m) + + fmt.Printf("[buddyAllocator] Inside the allocator: %d MiB(%d blocks), outside the allocator: %d MiB(%d blocks)\n", + int(b.Allocated())/1024/1024, len(b.allocated), + int(b.allocatedOutsideNum.Load()), int(b.allocatedOutside.Load())/1024/1024, + ) + case <-ctx.Done(): + return + } + } + }() + b.ctx = ctx + b.cancel = cancel +} + +func (b *buddyAllocator) Allocate(size int) []byte { + b.lock.Lock() + defer b.lock.Unlock() + + for i, arena := range b.arenas { + buf := arena.allocateInternal(size) + if buf != nil { + b.allocated[unsafeGetblkAddr(buf)] = i + return buf + } + } + + if arena := pool.get(); arena != nil { + b.arenas = append(b.arenas, arena) + buf := arena.allocateInternal(size) + b.allocated[unsafeGetblkAddr(buf)] = len(b.arenas) - 1 + return buf + } + + b.allocatedOutside.Add(int64(size)) + b.allocatedOutsideNum.Add(1) + return make([]byte, size) +} + +func (b *buddyAllocator) Free(bs []byte) { + b.lock.Lock() + defer b.lock.Unlock() + + if bs == nil || cap(bs) == 0 { + return + } + bs = bs[:1] + addr := unsafeGetblkAddr(bs) + arenaID, ok := b.allocated[addr] + if !ok { + return + } + + b.arenas[arenaID].freeInternal(bs) + delete(b.allocated, addr) +} + +func (b *buddyAllocator) Reallocate(size int, bs []byte) []byte { + b.Free(bs) + return b.Allocate(size) +} + +func (b *buddyAllocator) Allocated() int64 { + b.lock.Lock() + defer b.lock.Unlock() + + allocatedBytes := 0 + for _, arena := range b.arenas { + allocatedBytes += int(arena.allocatedBytes.Load()) + } + return int64(allocatedBytes) +} + +// Close return the allocated memory to the pool +func (b *buddyAllocator) Close() { + b.cancel() + for _, arena := range b.arenas { + arena.freeAll() + pool.put(arena) + } +} diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 41ff013142068..4ee9bc99af762 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -741,7 +741,7 @@ func (pp *ParquetParser) Close() error { return errors.Trace(err) } } - if buddy, ok := pp.alloc.(*memory.BuddyAllocator); ok { + if buddy, ok := pp.alloc.(*buddyAllocator); ok { buddy.Close() } return nil @@ -900,8 +900,8 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } - allocator := &memory.BuddyAllocator{} - allocator.Init(2 << 30) + allocator := &buddyAllocator{} + allocator.Init(0) prop := parquet.NewReaderProperties(allocator) prop.BufferedStreamEnabled = true From a078229844052e80655f98b9f20398ca8d4825a5 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 11:19:54 +0800 Subject: [PATCH 25/93] Fix memory controller --- lightning/pkg/importer/table_import.go | 3 +++ pkg/lightning/mydump/buddy_allocator.go | 8 ++++-- pkg/lightning/mydump/parquet_parser.go | 33 ++++++++++++++++--------- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index f5c6919bc4d7c..fe6f5d76665de 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -802,6 +802,9 @@ ChunkLoop: // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { memoryUsage := tr.tableMeta.DataFiles[0].FileMeta.MemoryUsage + arenaSize := mydump.GetArenaSize() + memoryUsage = (memoryUsage + arenaSize - 1) / arenaSize * arenaSize + memLimiter.Acquire(memoryUsage) cr.memLimiter = memLimiter cr.memoryUsage = memoryUsage diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index cc41920f27ca8..97341d932b8b5 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -32,7 +32,7 @@ import ( var ( maxArenaCount = 0 // maximum arena count - arenaDefaultSize = 1 << 30 // size of each arena + arenaDefaultSize = 512 << 20 // size of each arena leafSize = 256 << 10 // The smallest block size is 256KB ) @@ -41,6 +41,10 @@ func SetMaxMemoryUsage(size int) { maxArenaCount = size / arenaDefaultSize } +func GetArenaSize() int { + return arenaDefaultSize +} + // arenaPool is used to cache and reuse arenas type arenaPool struct { arenas chan *internalAllocator @@ -105,7 +109,7 @@ func (ap *arenaPool) adjustGCPercent() { gogc := os.Getenv("GOGC") memTotal, err := memory.MemTotal() if gogc == "" && err == nil { - percent := int(memTotal) * 100 / max(ap.allocated*arenaDefaultSize, 1) + percent := int(memTotal) * 90 / max(ap.allocated*arenaDefaultSize, 1) percent = min(percent, 100) / 10 * 10 old := debug.SetGCPercent(percent) runtime.GC() diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 4ee9bc99af762..41dcb5caf9bb2 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -263,6 +263,21 @@ func (pf *parquetFileWrapper) InitBuffer(bufSize int) { pf.buf = make([]byte, bufSize) } +func (pf *parquetFileWrapper) readNBytes(p []byte) (int, error) { + read := 0 + for read < len(p) { + n, err := pf.Read(p[read:]) + read += n + if err != nil { + return read, err + } + } + if read != len(p) { + return read, errors.Errorf("Error reading %d bytes, only read %d bytes", len(p), read) + } + return read, nil +} + // ReadAt implemement ReaderAt interface func (pf *parquetFileWrapper) ReadAt(p []byte, off int64) (int, error) { // We want to minimize the number of Seek call as much as possible, @@ -274,22 +289,18 @@ func (pf *parquetFileWrapper) ReadAt(p []byte, off int64) (int, error) { } } else { pf.buf = pf.buf[:gap] - if _, err := pf.Read(pf.buf); err != nil { - return 0, err + if read, err := pf.readNBytes(pf.buf); err != nil { + return read, err } } - read := 0 - for read < len(p) { - n, err := pf.Read(p[read:]) - read += n - pf.lastOff = off + int64(n) - if err != nil { - return read, err - } + read, err := pf.readNBytes(p) + if err != nil { + return read, err } + pf.lastOff = off + int64(read) - return read, nil + return len(p), nil } // Seek implemement Seeker interface From 0d6d2d3373bec74caa04e34d07481efc3510b49b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 14:28:51 +0800 Subject: [PATCH 26/93] Fix GC percentage --- lightning/pkg/importer/table_import.go | 2 +- pkg/lightning/mydump/buddy_allocator.go | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index fe6f5d76665de..8c86b458496c1 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -71,7 +71,7 @@ func setMemoryLimitForParquet(percent int) { // Set limit to int max, which means no limiter memTotal = math.MaxInt32 } - memLimit = int(memTotal) * min(percent, 100) / 100 + memLimit = int(memTotal) * min(percent, 90) / 100 memLimiter = membuf.NewLimiter(memLimit) mydump.SetMaxMemoryUsage(memLimit) } diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index 97341d932b8b5..5f64352af3d12 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -109,7 +109,11 @@ func (ap *arenaPool) adjustGCPercent() { gogc := os.Getenv("GOGC") memTotal, err := memory.MemTotal() if gogc == "" && err == nil { - percent := int(memTotal) * 90 / max(ap.allocated*arenaDefaultSize, 1) + if ap.allocated == 0 { + debug.SetGCPercent(100) + return + } + percent := int(memTotal)*90/(ap.allocated*arenaDefaultSize) - 100 percent = min(percent, 100) / 10 * 10 old := debug.SetGCPercent(percent) runtime.GC() From 134fabae6ffeb720a1f4ba8aeb6690aed9f30db0 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 16:14:26 +0800 Subject: [PATCH 27/93] Add more log --- pkg/lightning/mydump/buddy_allocator.go | 7 +++- pkg/lightning/mydump/loader.go | 52 ++++++++++++++++--------- pkg/lightning/mydump/loader_test.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 16 +++++++- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index 5f64352af3d12..ffead69623348 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -117,7 +117,12 @@ func (ap *arenaPool) adjustGCPercent() { percent = min(percent, 100) / 10 * 10 old := debug.SetGCPercent(percent) runtime.GC() - log.L().Debug("set gc percentage", zap.Int("old", old), zap.Int("new", percent)) + log.L().Debug("set gc percentage", + zap.Int("old", old), + zap.Int("new", percent), + zap.Int("total memory", int(memTotal)), + zap.Int("allocated memory", ap.allocated*arenaDefaultSize), + ) } } diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 7a95529f10310..8aaf25f859c75 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -100,9 +100,10 @@ type SourceFileMeta struct { // If the file is compressed, RealSize is the estimated uncompressed size. // If the file is parquet, RealSize is the estimated data size after convert // to row oriented storage. - RealSize int64 - Rows int64 // only for parquet - MemoryUsage int // only for parquet + RealSize int64 + Rows int64 // only for parquet + MemoryUsage int // only for parquet + MemoryUsageFull int // only for parquet } // NewMDTableMeta creates an Mydumper table meta with specified character set. @@ -247,8 +248,9 @@ type mdLoaderSetup struct { tableIndexMap map[filter.Table]int setupCfg *MDLoaderSetupConfig - sampledParquetRowSizes map[string]float64 - sampledParquetMemoryUsage map[string]int + sampledParquetRowSizes map[string]float64 + sampledParquetMemoryUsage map[string]int // sampled memory usage for streaming parquet read + sampledParquetMemoryUsageFull map[string]int // sampled memory usage for non-streaming parquet read } // NewLoader constructs a MyDumper loader that scanns the data source and constructs a set of metadatas. @@ -326,8 +328,9 @@ func NewLoaderWithStore(ctx context.Context, cfg LoaderConfig, tableIndexMap: make(map[filter.Table]int), setupCfg: mdLoaderSetupCfg, - sampledParquetRowSizes: make(map[string]float64), - sampledParquetMemoryUsage: make(map[string]int), + sampledParquetRowSizes: make(map[string]float64), + sampledParquetMemoryUsage: make(map[string]int), + sampledParquetMemoryUsageFull: make(map[string]int), } if err := setup.setup(ctx); err != nil { @@ -543,8 +546,11 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size case SourceTypeParquet: tableName := info.TableName.String() if s.sampledParquetRowSizes[tableName] == 0 { - s.sampledParquetRowSizes[tableName], s.sampledParquetMemoryUsage[tableName], err = - SampleParquetRowSize(ctx, info.FileMeta, s.loader.GetStore()) + s.sampledParquetRowSizes[tableName], + s.sampledParquetMemoryUsage[tableName], + s.sampledParquetMemoryUsageFull[tableName], + err = + SampleParquetFileProperty(ctx, info.FileMeta, s.loader.GetStore()) if err != nil { logger.Error("fail to sample parquet row size", zap.String("category", "loader"), zap.String("schema", res.Schema), zap.String("table", res.Name), @@ -566,6 +572,7 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size m.RowsCounter.WithLabelValues(metric.StateTotalRestore, tableName).Add(float64(totalRowCount)) } info.FileMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] + info.FileMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] } s.tableDatas = append(s.tableDatas, info) } @@ -851,22 +858,31 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store return float64(tot) / float64(pos), nil } -// SampleParquetRowSize samples row size of the parquet file. -func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (float64, int, error) { +// SampleParquetFileProperty samples row size and memory usage of the parquet file. +func SampleParquetFileProperty( + ctx context.Context, + fileMeta SourceFileMeta, + store storage.ExternalStorage, +) ( + avgRowSize float64, + memoryUsage int, + memoryUsageFull int, + err error, +) { totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta) if totalRowCount == 0 || err != nil { - return 0, 0, err + return 0, 0, 0, err } reader, err := store.Open(ctx, fileMeta.Path, nil) if err != nil { - return 0, 0, err + return 0, 0, 0, err } parser, err := NewParquetParserForSampling(ctx, store, reader, fileMeta.Path) if err != nil { //nolint: errcheck reader.Close() - return 0, 0, err + return 0, 0, 0, err } //nolint: errcheck defer parser.Close() @@ -881,7 +897,7 @@ func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store st if errors.Cause(err) == io.EOF { break } - return 0, 0, err + return 0, 0, 0, err } lastRow := parser.LastRow() rowCount++ @@ -892,8 +908,8 @@ func SampleParquetRowSize(ctx context.Context, fileMeta SourceFileMeta, store st } } - avgRowSize := float64(rowSize) / float64(rowCount) - memoryUsage := parser.GetMemoryUage() + avgRowSize = float64(rowSize) / float64(rowCount) + memoryUsage, memoryUsageFull = parser.GetMemoryUage() - return avgRowSize, memoryUsage, nil + return avgRowSize, memoryUsage, memoryUsageFull, nil } diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 9686c55621f4c..5473012b2134d 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1159,7 +1159,7 @@ func testSampleParquetDataSize(t *testing.T, count int) { err = store.WriteFile(ctx, fileName, bf.Bytes()) require.NoError(t, err) - rowSize, _, err := md.SampleParquetRowSize(ctx, md.SourceFileMeta{ + rowSize, _, _, err := md.SampleParquetFileProperty(ctx, md.SourceFileMeta{ Path: fileName, }, store) require.NoError(t, err) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 41dcb5caf9bb2..6967db241ce7c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -364,7 +364,7 @@ type ParquetParser struct { } // GetMemoryUage estimate the memory usage for this file. -func (pp *ParquetParser) GetMemoryUage() int { +func (pp *ParquetParser) GetMemoryUage() (int, int) { // Initialize column reader if pp.dumpers[0].reader == nil { pp.ReadRow() @@ -396,6 +396,18 @@ func (pp *ParquetParser) GetMemoryUage() int { dictUsage := 0 dataPageUsage := 0 readBufferUsage := roundup(bufSizes[0]) + roundup(defaultBufSize) + + readBufferUsageTotal := 0 + meta := pp.readers[0].MetaData() + for _, rg := range meta.RowGroups { + currUsage := 0 + for _, c := range rg.Columns { + currUsage += roundUp(int(c.MetaData.GetTotalCompressedSize())) + } + readBufferUsage = max(readBufferUsage, currUsage) + } + readBufferUsageTotal += roundUp(defaultBufSize) * len(pp.columnNames) + if len(bufSizes) == 3 { dataPageUsage = roundup(bufSizes[1]) } else { @@ -405,7 +417,7 @@ func (pp *ParquetParser) GetMemoryUage() int { } dataPageUsage = roundup(dataPageUsage) } - return (dataPageUsage + dictUsage + readBufferUsage) * len(pp.columnNames) + return (dataPageUsage + dictUsage + readBufferUsage) * len(pp.columnNames), (dataPageUsage+dictUsage)*len(pp.columnNames) + readBufferUsageTotal } func (pp *ParquetParser) setStringData(readNum, col, offset int) { From 8caf76cc8e07db02bc2d83a4610444f6951bb983 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 16:31:10 +0800 Subject: [PATCH 28/93] Fix build --- pkg/lightning/mydump/parquet_parser.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 6967db241ce7c..62cb9f3dce947 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -382,7 +382,7 @@ func (pp *ParquetParser) GetMemoryUage() (int, int) { alloc, ok := pp.alloc.(*sampleAllocator) if !ok { - return 0 + return 0, 0 } bufSizes := alloc.allocated @@ -391,31 +391,31 @@ func (pp *ParquetParser) GetMemoryUage() (int, int) { // read buffer, decompressed dict buffer, compressed buffer, decompressed data page buffer, compressed data page buffer... // and compressed buffer is released after decompression. // So we estimate the memory usage as: - // (roundup(decompressed dict buffer) + roundup(decompressed data page buffer) + roundup(read buffer) + roundup(parquet read buffer)) * num_cols + // (roundToPower2(decompressed dict buffer) + roundToPower2(decompressed data page buffer) + roundToPower2(read buffer) + roundToPower2(parquet read buffer)) * num_cols dictUsage := 0 dataPageUsage := 0 - readBufferUsage := roundup(bufSizes[0]) + roundup(defaultBufSize) + readBufferUsage := roundToPower2(bufSizes[0]) + roundToPower2(defaultBufSize) readBufferUsageTotal := 0 meta := pp.readers[0].MetaData() for _, rg := range meta.RowGroups { currUsage := 0 for _, c := range rg.Columns { - currUsage += roundUp(int(c.MetaData.GetTotalCompressedSize())) + currUsage += roundToPower2(int(c.MetaData.GetTotalCompressedSize())) } readBufferUsage = max(readBufferUsage, currUsage) } - readBufferUsageTotal += roundUp(defaultBufSize) * len(pp.columnNames) + readBufferUsageTotal += roundToPower2(defaultBufSize) * len(pp.columnNames) if len(bufSizes) == 3 { - dataPageUsage = roundup(bufSizes[1]) + dataPageUsage = roundToPower2(bufSizes[1]) } else { - dictUsage = roundup(bufSizes[1]) + dictUsage = roundToPower2(bufSizes[1]) for i := 3; i < len(bufSizes); i += 2 { dataPageUsage = max(bufSizes[i], dataPageUsage) } - dataPageUsage = roundup(dataPageUsage) + dataPageUsage = roundToPower2(dataPageUsage) } return (dataPageUsage + dictUsage + readBufferUsage) * len(pp.columnNames), (dataPageUsage+dictUsage)*len(pp.columnNames) + readBufferUsageTotal } @@ -993,7 +993,7 @@ func (sa *sampleAllocator) Reallocate(size int, buf []byte) []byte { return make([]byte, size) } -func roundup(n int) int { +func roundToPower2(n int) int { v := uint(n) v-- v |= v >> 1 From c19cfeb046f5fb10be62dc2a9711f13010bf3840 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 17:21:35 +0800 Subject: [PATCH 29/93] Fix error --- lightning/pkg/importer/table_import.go | 7 +++---- pkg/lightning/mydump/parquet_parser.go | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 8c86b458496c1..e405c7c18b521 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -805,10 +805,6 @@ ChunkLoop: arenaSize := mydump.GetArenaSize() memoryUsage = (memoryUsage + arenaSize - 1) / arenaSize * arenaSize - memLimiter.Acquire(memoryUsage) - cr.memLimiter = memLimiter - cr.memoryUsage = memoryUsage - // If memory usage is larger than memory limit, set memory usage // to limit to block other file import. if memoryUsage > memLimit { @@ -826,6 +822,9 @@ ChunkLoop: ) } + memLimiter.Acquire(memoryUsage) + cr.memLimiter = memLimiter + cr.memoryUsage = memoryUsage } restoreWorker := rc.regionWorkers.Apply() diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 62cb9f3dce947..d02ffcc2ff058 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -404,7 +404,7 @@ func (pp *ParquetParser) GetMemoryUage() (int, int) { for _, c := range rg.Columns { currUsage += roundToPower2(int(c.MetaData.GetTotalCompressedSize())) } - readBufferUsage = max(readBufferUsage, currUsage) + readBufferUsageTotal = max(readBufferUsageTotal, currUsage) } readBufferUsageTotal += roundToPower2(defaultBufSize) * len(pp.columnNames) From 83747280d5c75b74236dc339f6e1d51eae8761e3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 7 Jan 2025 16:45:24 +0800 Subject: [PATCH 30/93] Add read mode --- DEPS.bzl | 12 ++++---- go.mod | 2 +- go.sum | 18 ++---------- lightning/pkg/importer/chunk_process.go | 3 +- lightning/pkg/importer/dup_detect.go | 2 ++ lightning/pkg/importer/table_import.go | 30 +++++++++++++++----- pkg/lightning/mydump/buddy_allocator.go | 2 ++ pkg/lightning/mydump/loader.go | 33 +++++++++++++++------- pkg/lightning/mydump/parquet_parser.go | 37 +++++++++++++++---------- pkg/lightning/mydump/region.go | 2 +- 10 files changed, 84 insertions(+), 57 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 0d7aec3cc3120..a9c213ae3be01 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -4121,13 +4121,13 @@ def go_deps(): name = "com_github_joechenrh_arrow_go_v18", build_file_proto_mode = "disable_global", importpath = "github.com/joechenrh/arrow-go/v18", - sha256 = "b326b7a449c131368eb5b9b5c5629ed827c2aa22579abfc786cc7ea2d1c42229", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20241231101551-0f1e67b25b9e", + sha256 = "fd8f195bd73fd66342c6bf66c3ce6977bcc7544a7aab7fc2e2002afbcf95c7a9", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250107060625-e99480fe0ed9", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20241231101551-0f1e67b25b9e.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", ], ) go_repository( diff --git a/go.mod b/go.mod index 6e6e5a149ff02..400b37072c24f 100644 --- a/go.mod +++ b/go.mod @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e +require github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9 require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index 10fd57dd8f2a9..d6748e713e73e 100644 --- a/go.sum +++ b/go.sum @@ -510,22 +510,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113 h1:gaUhamPZuxAcha0gbbg/iN4BO3nLK2OnyQHb1wzrtRo= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241227080927-1a72af1ee113/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241230101731-8134d78bce89 h1:fd4nBmlnLjWPZsCWbV6JbYGH+kEO5lRVyf6F+ZAiOLc= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241230101731-8134d78bce89/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231022458-5c5cd6bd0422 h1:56uGSEyQExsRjh4hnmOmJqDz0vb65pH72Dxsep1Abvw= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231022458-5c5cd6bd0422/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527 h1:SPTAgulop3tscwhH83DRM3eA3BnlnJUuMZaS7UtE5a4= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231031812-53833096a527/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a h1:TRWmL6aSpSj8MPZLc4cHpGxQUivvhAOEqxKos8cUVIU= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231055212-8bc1d3aa614a/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231081713-7bace6c313a2 h1:VlNIeUAhBh6+3N+Cs9Jn4t77dnp/o3AZvVyPJVC/u0A= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231081713-7bace6c313a2/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231083420-6131e1087c61 h1:WbgGeWZpukO65agrWVlAzvu4PDk4fyEEkeRi5Reh/XQ= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231083420-6131e1087c61/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e h1:7iY3f14jAyh+dwSy92IPyxoYg2BWn46O2uPOR3fpO5k= -github.com/joechenrh/arrow-go/v18 v18.0.0-20241231101551-0f1e67b25b9e/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9 h1:LJGbjOFBrjYubt498ycNLCkXth989t1N9LjWdGuD36U= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index 533cb8ab8cd09..ec8d9011269cb 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -85,7 +85,6 @@ func openParser( ioWorkers *worker.Pool, store storage.ExternalStorage, tblInfo *model.TableInfo, - ) (mydump.Parser, error) { blockBufSize := int64(cfg.Mydumper.ReadBlockSize) reader, err := mydump.OpenReader(ctx, &chunk.FileMeta, store, storage.DecompressConfig{ @@ -111,7 +110,7 @@ func openParser( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, cfg.TiDB.SQLMode, reader, blockBufSize, ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, store, reader, chunk.FileMeta.Path) + parser, err = mydump.NewParquetParserWithMeta(ctx, store, reader, chunk.FileMeta.Path, chunk.FileMeta.ParquetMeta) if err != nil { return nil, err } diff --git a/lightning/pkg/importer/dup_detect.go b/lightning/pkg/importer/dup_detect.go index e5167246a93d9..98dc2444104a6 100644 --- a/lightning/pkg/importer/dup_detect.go +++ b/lightning/pkg/importer/dup_detect.go @@ -203,6 +203,8 @@ func (d *dupDetector) addKeysByChunk( adder *duplicate.KeyAdder, chunk *checkpoints.ChunkCheckpoint, ) error { + chunk.FileMeta.ParquetMeta.UseStreaming = true + chunk.FileMeta.ParquetMeta.UseSampleAllocator = false parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core) if err != nil { return err diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index e405c7c18b521..f43c5f1df5239 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -793,17 +793,23 @@ ChunkLoop: setError(err) break } - cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) - if err != nil { - setError(err) - break - } + var memoryUsage int // Limit the concurrency of parquet reader using estimated memory usage. if chunk.FileMeta.Type == mydump.SourceTypeParquet { - memoryUsage := tr.tableMeta.DataFiles[0].FileMeta.MemoryUsage arenaSize := mydump.GetArenaSize() - memoryUsage = (memoryUsage + arenaSize - 1) / arenaSize * arenaSize + + memQuota := memLimit / rc.cfg.App.RegionConcurrency / arenaSize * arenaSize + memoryUsageFull := (chunk.FileMeta.ParquetMeta.MemoryUsageFull + arenaSize - 1) / arenaSize * arenaSize + if memQuota > memoryUsageFull { + memoryUsage = memoryUsageFull + chunk.FileMeta.ParquetMeta.UseStreaming = false + } else { + memoryUsage = chunk.FileMeta.ParquetMeta.MemoryUsage + memoryUsage = (memoryUsage + arenaSize - 1) / arenaSize * arenaSize + chunk.FileMeta.ParquetMeta.UseStreaming = true + } + chunk.FileMeta.ParquetMeta.UseSampleAllocator = false // If memory usage is larger than memory limit, set memory usage // to limit to block other file import. @@ -812,6 +818,7 @@ ChunkLoop: zap.String("file", chunk.FileMeta.Path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), + zap.Bool("streaming mode", chunk.FileMeta.ParquetMeta.UseStreaming), ) memoryUsage = memLimit } else { @@ -819,9 +826,18 @@ ChunkLoop: zap.String("file", chunk.FileMeta.Path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), + zap.Bool("streaming mode", chunk.FileMeta.ParquetMeta.UseStreaming), ) } + } + + cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) + if err != nil { + setError(err) + break + } + if chunk.FileMeta.Type == mydump.SourceTypeParquet { memLimiter.Acquire(memoryUsage) cr.memLimiter = memLimiter cr.memoryUsage = memoryUsage diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index ffead69623348..ed04f5739cb44 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -115,6 +115,8 @@ func (ap *arenaPool) adjustGCPercent() { } percent := int(memTotal)*90/(ap.allocated*arenaDefaultSize) - 100 percent = min(percent, 100) / 10 * 10 + percent = max(percent, 5) + old := debug.SetGCPercent(percent) runtime.GC() log.L().Debug("set gc percentage", diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 8aaf25f859c75..8235cedff4974 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -86,6 +86,15 @@ type MDTableMeta struct { IsRowOrdered bool } +// ParquetFileMeta contains some analyzed metadata for a parquet file by MyDumper Loader. +type ParquetFileMeta struct { + Rows int64 // row count + MemoryUsage int // memory usage for streaming mode + MemoryUsageFull int // memory usage for non-streaming mode + UseStreaming bool // whether use streaming mode + UseSampleAllocator bool // whether use sample allocator +} + // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. type SourceFileMeta struct { Path string @@ -100,10 +109,9 @@ type SourceFileMeta struct { // If the file is compressed, RealSize is the estimated uncompressed size. // If the file is parquet, RealSize is the estimated data size after convert // to row oriented storage. - RealSize int64 - Rows int64 // only for parquet - MemoryUsage int // only for parquet - MemoryUsageFull int // only for parquet + RealSize int64 + + ParquetMeta ParquetFileMeta // only for parquet } // NewMDTableMeta creates an Mydumper table meta with specified character set. @@ -549,8 +557,7 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size s.sampledParquetRowSizes[tableName], s.sampledParquetMemoryUsage[tableName], s.sampledParquetMemoryUsageFull[tableName], - err = - SampleParquetFileProperty(ctx, info.FileMeta, s.loader.GetStore()) + err = SampleParquetFileProperty(ctx, info.FileMeta, s.loader.GetStore()) if err != nil { logger.Error("fail to sample parquet row size", zap.String("category", "loader"), zap.String("schema", res.Schema), zap.String("table", res.Name), @@ -567,12 +574,14 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size return errors.Trace(err) } info.FileMeta.RealSize = int64(float64(totalRowCount) * s.sampledParquetRowSizes[tableName]) - info.FileMeta.Rows = totalRowCount + info.FileMeta.ParquetMeta.Rows = totalRowCount if m, ok := metric.FromContext(ctx); ok { m.RowsCounter.WithLabelValues(metric.StateTotalRestore, tableName).Add(float64(totalRowCount)) } - info.FileMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] - info.FileMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] + info.FileMeta.ParquetMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] + info.FileMeta.ParquetMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] + info.FileMeta.ParquetMeta.UseStreaming = true + info.FileMeta.ParquetMeta.UseSampleAllocator = false } s.tableDatas = append(s.tableDatas, info) } @@ -878,7 +887,11 @@ func SampleParquetFileProperty( if err != nil { return 0, 0, 0, err } - parser, err := NewParquetParserForSampling(ctx, store, reader, fileMeta.Path) + + parquetMeta := fileMeta.ParquetMeta + parquetMeta.UseStreaming = true + parquetMeta.UseSampleAllocator = true + parser, err := NewParquetParserWithMeta(ctx, store, reader, fileMeta.Path, parquetMeta) if err != nil { //nolint: errcheck reader.Close() diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index d02ffcc2ff058..838890a3758da 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -1004,13 +1004,13 @@ func roundToPower2(n int) int { return max(int(v+1), 256<<10) } -// NewParquetParserForSampling generates a parquet parser used in sampling. -// The only difference is that we use a special allocator to track the memory allocation. -func NewParquetParserForSampling( +// NewParquetParserWithMeta generates a parquet parser. +func NewParquetParserWithMeta( ctx context.Context, store storage.ExternalStorage, r storage.ReadSeekCloser, path string, + meta ParquetFileMeta, ) (*ParquetParser, error) { wrapper, ok := r.(*parquetFileWrapper) if !ok { @@ -1023,9 +1023,17 @@ func NewParquetParserForSampling( wrapper.InitBuffer(defaultBufSize) } - alloc := &sampleAllocator{} - prop := parquet.NewReaderProperties(alloc) - prop.BufferedStreamEnabled = true + var allocator memory.Allocator + if meta.UseSampleAllocator { + allocator = &sampleAllocator{} + } else { + alloc := &buddyAllocator{} + alloc.Init(0) + allocator = alloc + } + + prop := parquet.NewReaderProperties(allocator) + prop.BufferedStreamEnabled = meta.UseStreaming reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) if err != nil { @@ -1053,15 +1061,16 @@ func NewParquetParserForSampling( subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) subreaders = append(subreaders, reader) for i := 1; i < fileSchema.NumColumns(); i++ { - newWrapper, err := wrapper.Open("") - if err != nil { - return nil, errors.Trace(err) + var newWrapper parquet.ReaderAtSeeker + if meta.UseStreaming { + newWrapper, err = wrapper.Open("") + if err != nil { + return nil, errors.Trace(err) + } + } else { + newWrapper = wrapper } - - prop := parquet.NewReaderProperties(nil) - prop.BufferedStreamEnabled = true reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) - if err != nil { return nil, errors.Trace(err) } @@ -1072,7 +1081,7 @@ func NewParquetParserForSampling( readers: subreaders, colMetas: columnMetas, columnNames: columnNames, - alloc: alloc, + alloc: allocator, logger: log.FromContext(ctx), } parser.Init() diff --git a/pkg/lightning/mydump/region.go b/pkg/lightning/mydump/region.go index b5598005473ae..06f6d389bf5a0 100644 --- a/pkg/lightning/mydump/region.go +++ b/pkg/lightning/mydump/region.go @@ -366,7 +366,7 @@ func makeParquetFileRegion( cfg *DataDivideConfig, dataFile FileInfo, ) ([]*TableRegion, []float64, error) { - numberRows := dataFile.FileMeta.Rows + numberRows := dataFile.FileMeta.ParquetMeta.Rows var err error // for safety if numberRows <= 0 { From c3860496b87bab785150217fd7ec561d48826ec2 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 10 Jan 2025 10:39:26 +0800 Subject: [PATCH 31/93] Update allocator --- pkg/lightning/mydump/allocator.go | 237 ++++++++++++++++++ pkg/lightning/mydump/buddy_allocator.go | 308 +++++------------------- pkg/lightning/mydump/parquet_parser.go | 9 +- 3 files changed, 297 insertions(+), 257 deletions(-) create mode 100644 pkg/lightning/mydump/allocator.go diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go new file mode 100644 index 0000000000000..5c6d06c68b91f --- /dev/null +++ b/pkg/lightning/mydump/allocator.go @@ -0,0 +1,237 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "context" + "fmt" + "os" + "runtime" + "runtime/debug" + "sync" + "sync/atomic" + "time" + "unsafe" + + "github.com/joechenrh/arrow-go/v18/arrow/memory" + "github.com/pingcap/tidb/pkg/lightning/log" + tidbmemory "github.com/pingcap/tidb/pkg/util/memory" + "go.uber.org/zap" + "golang.org/x/exp/rand" +) + +var ( + maxArenaCount = 0 // maximum arena count + arenaDefaultSize = 512 << 20 // size of each arena + leafSize = 256 << 10 // The smallest block size is 256KB +) + +// SetMaxMemoryUsage set the memory used by parquet reader. +func SetMaxMemoryUsage(size int) { + maxArenaCount = size / arenaDefaultSize +} + +func GetArenaSize() int { + return arenaDefaultSize +} + +type arena interface { + allocate(size int) []byte + free(bs []byte) + allocated() int64 + reset() + freeAll() +} + +// Convert slice to an uintptr. This value is used as key in map. +func unsafeGetblkAddr(slice []byte) uintptr { + return uintptr(unsafe.Pointer(&slice[0])) +} + +var ( + arenas []atomic.Value + numArenas atomic.Int32 + lock sync.Mutex + + ctx context.Context + cancel context.CancelFunc + + allocatedOutside atomic.Int64 + allocatedOutsideNum atomic.Int64 +) + +func initArenas() { + arenas = make([]atomic.Value, maxArenaCount) + initNum := min(8, maxArenaCount) + for i := 0; i < initNum; i++ { + a := &buddyAllocator{} + a.init(arenaDefaultSize) + arenas[i].Store(a) + } + numArenas.Store(int32(initNum)) + + ctx, cancel = context.WithCancel(context.Background()) + go getStatus() +} + +func freeArenas() { + cancel() + for i := 0; i < len(arenas); i++ { + if v := arenas[i].Load(); v != nil { + a := v.(arena) + a.freeAll() + a.reset() + a = nil + // store an empty buddyAllocator to release the memory + arenas[i].Store(&buddyAllocator{}) + } + } + numArenas.Store(0) + arenas = nil +} + +func adjustGCPercent() { + gogc := os.Getenv("GOGC") + memTotal, err := tidbmemory.MemTotal() + if gogc == "" && err == nil { + if numArenas.Load() == 0 { + debug.SetGCPercent(100) + return + } + percent := int(memTotal)*90/(int(numArenas.Load())*arenaDefaultSize) - 100 + percent = min(percent, 100) / 10 * 10 + percent = max(percent, 5) + + old := debug.SetGCPercent(percent) + runtime.GC() + log.L().Debug("set gc percentage", + zap.Int("old", old), + zap.Int("new", percent), + zap.Int("total memory", int(memTotal)), + zap.Int("allocated memory", int(numArenas.Load())*arenaDefaultSize), + ) + } +} + +type defaultAllocator struct { + allocatedBuf sync.Map +} + +func (alloc *defaultAllocator) Init(_ int) { + +} + +func getStatus() { + tick := time.NewTicker(2 * time.Second) + defer tick.Stop() + for { + select { + case <-tick.C: + l := int(numArenas.Load()) + var totalAllocated int64 + for i := 0; i < l; i++ { + if a := arenas[i].Load().(arena); a != nil { + totalAllocated += a.allocated() + } + } + + fmt.Printf("[buddyAllocator] Inside the allocator: %d MiB, outside the allocator: %d MiB(%d blocks)\n", + int(totalAllocated)/1024/1024, + int(allocatedOutsideNum.Load()), + int(allocatedOutside.Load())/1024/1024, + ) + case <-ctx.Done(): + return + } + } +} + +func (alloc *defaultAllocator) Allocate(size int) []byte { +START: + // start from a random arena to avoid contention + l := int(numArenas.Load()) + idx := rand.Intn(l) + for i := 0; i < l; i++ { + a := arenas[idx].Load().(arena) + if buf := a.allocate(size); buf != nil { + alloc.allocatedBuf.Store(unsafeGetblkAddr(buf), idx) + return buf + } + idx = (idx + 1) % l + } + + // Can't create new arena, use make to allocate memory + if l == maxArenaCount { + allocatedOutside.Add(int64(size)) + allocatedOutsideNum.Add(1) + return make([]byte, size) + } + + // Create some new arenas, if someone else has created the arena, just use it. + lock.Lock() + defer lock.Unlock() + for i := 0; i < min(maxArenaCount-l, 2); i++ { + if arenas[l+i].Load() == nil { + a := &buddyAllocator{} + a.init(arenaDefaultSize) + arenas[l+i].Store(a) + numArenas.Add(1) + } + } + adjustGCPercent() + + idx = int(numArenas.Load()) - 1 + a := arenas[idx].Load().(arena) + if buf := a.allocate(size); buf != nil { + alloc.allocatedBuf.Store(unsafeGetblkAddr(buf), idx) + return buf + } + + // This should rarely happen, goto START to try again + goto START +} + +func (alloc *defaultAllocator) Free(buf []byte) { + if buf == nil || cap(buf) == 0 { + return + } + addr := unsafeGetblkAddr(buf[:1]) + arenaID, ok := alloc.allocatedBuf.Load(addr) + if !ok { + return + } + + arenas[arenaID.(int)].Load().(arena).free(buf) + alloc.allocatedBuf.Delete(addr) +} + +func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { + alloc.Free(buf) + return alloc.Allocate(size) +} + +var once sync.Once + +func GetDefaultAllocator() memory.Allocator { + once.Do(func() { + initArenas() + }) + return &defaultAllocator{} +} + +// FreeMemory free all the memory allocated for arenas. +func FreeMemory() { + freeArenas() +} diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index ed04f5739cb44..2f34f5908859b 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -15,135 +15,11 @@ package mydump import ( - "context" "fmt" - "os" - "runtime" - "runtime/debug" "sync" "sync/atomic" - "time" - "unsafe" - - "github.com/pingcap/tidb/pkg/lightning/log" - "github.com/pingcap/tidb/pkg/util/memory" - "go.uber.org/zap" -) - -var ( - maxArenaCount = 0 // maximum arena count - arenaDefaultSize = 512 << 20 // size of each arena - leafSize = 256 << 10 // The smallest block size is 256KB ) -// SetMaxMemoryUsage set the memory used by parquet reader. -func SetMaxMemoryUsage(size int) { - maxArenaCount = size / arenaDefaultSize -} - -func GetArenaSize() int { - return arenaDefaultSize -} - -// arenaPool is used to cache and reuse arenas -type arenaPool struct { - arenas chan *internalAllocator - allocated int - lock sync.Mutex -} - -func (ap *arenaPool) get() *internalAllocator { - // First try to get cached arena - select { - case a := <-ap.arenas: - return a - default: - } - - ap.lock.Lock() - defer ap.lock.Unlock() - - // Create a new one and return - if ap.allocated < maxArenaCount { - ap.allocated++ - bd := &internalAllocator{} - bd.init(arenaDefaultSize) - ap.adjustGCPercent() - return bd - } - - // We can't create new arena, return nil - return nil -} - -func (ap *arenaPool) put(a *internalAllocator) { - ap.lock.Lock() - defer ap.lock.Unlock() - - // discard it if necessary - if ap.allocated > maxArenaCount { - a.bufInfo = nil - a.buffer = nil - ap.allocated-- - ap.adjustGCPercent() - return - } - - ap.arenas <- a -} - -func (ap *arenaPool) free() { - ap.lock.Lock() - defer ap.lock.Unlock() - - ap.allocated = 0 - for len(ap.arenas) > 0 { - a := <-ap.arenas - a.bufInfo = nil - a.buffer = nil - } - ap.adjustGCPercent() -} - -func (ap *arenaPool) adjustGCPercent() { - gogc := os.Getenv("GOGC") - memTotal, err := memory.MemTotal() - if gogc == "" && err == nil { - if ap.allocated == 0 { - debug.SetGCPercent(100) - return - } - percent := int(memTotal)*90/(ap.allocated*arenaDefaultSize) - 100 - percent = min(percent, 100) / 10 * 10 - percent = max(percent, 5) - - old := debug.SetGCPercent(percent) - runtime.GC() - log.L().Debug("set gc percentage", - zap.Int("old", old), - zap.Int("new", percent), - zap.Int("total memory", int(memTotal)), - zap.Int("allocated memory", ap.allocated*arenaDefaultSize), - ) - } -} - -var pool = &arenaPool{ - allocated: 0, - arenas: make(chan *internalAllocator, 256), -} - -// FreeMemory free all the memory allocated for arenas. -// TODO(joechenrh): check if there are anyone using the arenas. -func FreeMemory() { - pool.free() -} - -// Convert slice to an uintptr. This value is used as key in map. -func unsafeGetblkAddr(slice []byte) uintptr { - return uintptr(unsafe.Pointer(&slice[0])) -} - func roundUp(n, sz int) int { return (n + sz - 1) / sz * sz } @@ -257,21 +133,23 @@ func (binfo *bufferInfo) pop() int { } // buffer is represented as an offset. -type internalAllocator struct { +type buddyAllocator struct { buffer []byte bufInfo []bufferInfo nLayers int maxLayer int - allocated map[uintptr]int + allocatedBuf map[uintptr]int allocatedBytes atomic.Int64 unavailable int total int + + lock sync.Mutex } // Find the layer of the block at offset -func (b *internalAllocator) layer(offset int) int { +func (b *buddyAllocator) layer(offset int) int { for k := 0; k < b.maxLayer; k++ { if bitIsSet(b.bufInfo[k+1].split, blkIndex(k+1, offset)) { return k @@ -281,7 +159,10 @@ func (b *internalAllocator) layer(offset int) int { } // Allocate nbytes, but malloc won't return anything smaller than LeafSize -func (b *internalAllocator) allocateInternal(nbytes int) []byte { +func (b *buddyAllocator) allocate(nbytes int) []byte { + b.lock.Lock() + defer b.lock.Unlock() + // Find a free block >= nbytes, starting with lowest layer possible fl := firstLayer(nbytes) l := fl @@ -312,16 +193,28 @@ func (b *internalAllocator) allocateInternal(nbytes int) []byte { buf := b.buffer[offset : offset+nbytes] b.allocatedBytes.Add(int64(blkSize(l))) - b.allocated[unsafeGetblkAddr(buf)] = offset + addr := unsafeGetblkAddr(buf) + if off, ok := b.allocatedBuf[addr]; ok { + fmt.Println("duplicated allocation", addr, offset, off) + panic("duplicated allocation") + } + b.allocatedBuf[addr] = offset + b.sanityCheck() return buf } // free memory marked by p, which was earlier allocated using Malloc -func (b *internalAllocator) freeInternal(bs []byte) { - bs = bs[:1] +func (b *buddyAllocator) free(bs []byte) { + b.lock.Lock() + defer b.lock.Unlock() + + if len(bs) == 0 { + bs = bs[:1] + } + addr := unsafeGetblkAddr(bs) - offset, ok := b.allocated[addr] + offset, ok := b.allocatedBuf[addr] if !ok { return } @@ -329,7 +222,7 @@ func (b *internalAllocator) freeInternal(bs []byte) { l := b.layer(offset) b.allocatedBytes.Add(-int64(blkSize(l))) - delete(b.allocated, addr) + delete(b.allocatedBuf, addr) // Start merge from layer l for ; l < b.maxLayer; l++ { @@ -367,14 +260,34 @@ func (b *internalAllocator) freeInternal(bs []byte) { b.sanityCheck() } -func (b *internalAllocator) freeAll() { - for _, offset := range b.allocated { - b.freeInternal(b.buffer[offset:]) +func (b *buddyAllocator) reset() { + for i := range b.bufInfo { + b.bufInfo[i].alloc = nil + b.bufInfo[i].split = nil + b.bufInfo[i].canAllocate = nil } + b.bufInfo = nil + b.buffer = nil +} - if len(b.allocated) != 0 || b.allocatedBytes.Load() != 0 { +func (b *buddyAllocator) freeAll() { + if len(b.allocatedBuf) != 0 { + fmt.Println("allocatedBuf", len(b.allocatedBuf)) + } + + for _, offset := range b.allocatedBuf { + b.free(b.buffer[offset:]) + } + + if len(b.allocatedBuf) != 0 || b.allocatedBytes.Load() != 0 { panic("freeAll error") } + + b.sanityCheck() +} + +func (b *buddyAllocator) allocated() int64 { + return b.allocatedBytes.Load() } /* @@ -384,7 +297,7 @@ func (b *internalAllocator) freeAll() { * | | | * |--------|---------|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|--------|--------| */ -func (b *internalAllocator) markAllocated(start, end int) { +func (b *buddyAllocator) markAllocated(start, end int) { for k := 0; k < b.nLayers; k++ { leftBi := blkIndex(k, start) rightBi := blkIndexNext(k, end) @@ -397,7 +310,7 @@ func (b *internalAllocator) markAllocated(start, end int) { } // Mark the range outside [start, end) as allocated -func (b *internalAllocator) markUnavailable(start, end int) int { +func (b *buddyAllocator) markUnavailable(start, end int) int { heapSize := blkSize(b.maxLayer) unavailableEnd := roundUp(heapSize-end, leafSize) unavailableStart := roundUp(start, leafSize) @@ -408,7 +321,7 @@ func (b *internalAllocator) markUnavailable(start, end int) int { // If a block is marked as allocated and its buddy is free, put the // buddy on the free list at layer l. -func (b *internalAllocator) initFreePair(l, bi int) (free int) { +func (b *buddyAllocator) initFreePair(l, bi int) (free int) { buddy := bi + 1 if bi%2 == 1 { buddy = bi - 1 @@ -435,7 +348,7 @@ func (b *internalAllocator) initFreePair(l, bi int) (free int) { * | | | | * |xxxxxxxx|xxxxxxxx|x-------|--------|--------|------xx|xxxxxxxx|xxxxxxxx| */ -func (b *internalAllocator) initFree(left, right int) int { +func (b *buddyAllocator) initFree(left, right int) int { free := 0 for l := 0; l < b.maxLayer; l++ { @@ -455,7 +368,7 @@ func (b *internalAllocator) initFree(left, right int) int { } // Initialize the buddy allocator, assert totalSize is the power of 2. -func (b *internalAllocator) init(totalSize int) { +func (b *buddyAllocator) init(totalSize int) { log2 := func(n int) int { k := 0 for n > 1 { @@ -504,10 +417,10 @@ func (b *internalAllocator) init(totalSize int) { panic("Initialize allocator failed") } - b.allocated = make(map[uintptr]int, totalSize/leafSize) + b.allocatedBuf = make(map[uintptr]int, totalSize/leafSize) } -func (b *internalAllocator) sanityCheck() { +func (b *buddyAllocator) sanityCheck() { free := 0 for _, binfo := range b.bufInfo { blkSize := blkSize(binfo.l) @@ -519,7 +432,7 @@ func (b *internalAllocator) sanityCheck() { } alloc := 0 - for _, offset := range b.allocated { + for _, offset := range b.allocatedBuf { alloc += blkSize(b.layer(offset)) } if alloc != int(b.allocatedBytes.Load()) { @@ -530,108 +443,3 @@ func (b *internalAllocator) sanityCheck() { panic("Sanity check failed") } } - -type buddyAllocator struct { - arenas []*internalAllocator - allocated map[uintptr]int - lock sync.Mutex - - allocatedOutside atomic.Int64 - allocatedOutsideNum atomic.Int64 - - ctx context.Context - cancel context.CancelFunc -} - -func (b *buddyAllocator) Init(_ int) { - b.allocated = make(map[uintptr]int, maxArenaCount) - - ctx, cancel := context.WithCancel(context.Background()) - go func() { - tick := time.NewTicker(2 * time.Second) - defer tick.Stop() - for { - select { - case <-tick.C: - var m runtime.MemStats - runtime.ReadMemStats(&m) - - fmt.Printf("[buddyAllocator] Inside the allocator: %d MiB(%d blocks), outside the allocator: %d MiB(%d blocks)\n", - int(b.Allocated())/1024/1024, len(b.allocated), - int(b.allocatedOutsideNum.Load()), int(b.allocatedOutside.Load())/1024/1024, - ) - case <-ctx.Done(): - return - } - } - }() - b.ctx = ctx - b.cancel = cancel -} - -func (b *buddyAllocator) Allocate(size int) []byte { - b.lock.Lock() - defer b.lock.Unlock() - - for i, arena := range b.arenas { - buf := arena.allocateInternal(size) - if buf != nil { - b.allocated[unsafeGetblkAddr(buf)] = i - return buf - } - } - - if arena := pool.get(); arena != nil { - b.arenas = append(b.arenas, arena) - buf := arena.allocateInternal(size) - b.allocated[unsafeGetblkAddr(buf)] = len(b.arenas) - 1 - return buf - } - - b.allocatedOutside.Add(int64(size)) - b.allocatedOutsideNum.Add(1) - return make([]byte, size) -} - -func (b *buddyAllocator) Free(bs []byte) { - b.lock.Lock() - defer b.lock.Unlock() - - if bs == nil || cap(bs) == 0 { - return - } - bs = bs[:1] - addr := unsafeGetblkAddr(bs) - arenaID, ok := b.allocated[addr] - if !ok { - return - } - - b.arenas[arenaID].freeInternal(bs) - delete(b.allocated, addr) -} - -func (b *buddyAllocator) Reallocate(size int, bs []byte) []byte { - b.Free(bs) - return b.Allocate(size) -} - -func (b *buddyAllocator) Allocated() int64 { - b.lock.Lock() - defer b.lock.Unlock() - - allocatedBytes := 0 - for _, arena := range b.arenas { - allocatedBytes += int(arena.allocatedBytes.Load()) - } - return int64(allocatedBytes) -} - -// Close return the allocated memory to the pool -func (b *buddyAllocator) Close() { - b.cancel() - for _, arena := range b.arenas { - arena.freeAll() - pool.put(arena) - } -} diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 838890a3758da..1ea2572254c3e 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -764,9 +764,6 @@ func (pp *ParquetParser) Close() error { return errors.Trace(err) } } - if buddy, ok := pp.alloc.(*buddyAllocator); ok { - buddy.Close() - } return nil } @@ -923,8 +920,7 @@ func NewParquetParser( wrapper.InitBuffer(defaultBufSize) } - allocator := &buddyAllocator{} - allocator.Init(0) + allocator := GetDefaultAllocator() prop := parquet.NewReaderProperties(allocator) prop.BufferedStreamEnabled = true @@ -1027,8 +1023,7 @@ func NewParquetParserWithMeta( if meta.UseSampleAllocator { allocator = &sampleAllocator{} } else { - alloc := &buddyAllocator{} - alloc.Init(0) + alloc := GetDefaultAllocator() allocator = alloc } From e8af41aa3d3342b67ae3313fef4151a6ea15c75a Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 10 Jan 2025 10:49:17 +0800 Subject: [PATCH 32/93] Add comment --- pkg/lightning/mydump/allocator.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 5c6d06c68b91f..187e93b3f0b38 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -43,6 +43,7 @@ func SetMaxMemoryUsage(size int) { maxArenaCount = size / arenaDefaultSize } +// GetArenaSize return the default arena size func GetArenaSize() int { return arenaDefaultSize } @@ -224,6 +225,7 @@ func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { var once sync.Once +// GetDefaultAllocator get a default allocator func GetDefaultAllocator() memory.Allocator { once.Do(func() { initArenas() From 32cba87c6209f55004d47ea7f39073367510fe99 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 17 Jan 2025 14:13:51 +0800 Subject: [PATCH 33/93] Update code --- lightning/pkg/importer/table_import.go | 10 +- pkg/lightning/backend/kv/sql2kv.go | 1 + pkg/lightning/mydump/allocator.go | 297 +++++++++++++---------- pkg/lightning/mydump/buddy_allocator.go | 18 +- pkg/lightning/mydump/loader.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 62 +++-- pkg/lightning/mydump/simple_allocator.go | 209 ++++++++++++++++ 7 files changed, 429 insertions(+), 170 deletions(-) create mode 100644 pkg/lightning/mydump/simple_allocator.go diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index f43c5f1df5239..9cee3fe4f960f 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -73,7 +73,7 @@ func setMemoryLimitForParquet(percent int) { } memLimit = int(memTotal) * min(percent, 90) / 100 memLimiter = membuf.NewLimiter(memLimit) - mydump.SetMaxMemoryUsage(memLimit) + mydump.InitializeGlobalArena(memLimit) } // TableImporter is a helper struct to import a table. @@ -109,7 +109,7 @@ func NewTableImporter( logger log.Logger, ) (*TableImporter, error) { idAlloc := kv.NewPanickingAllocatorsWithBase(tableInfo.Core.SepAutoInc(), cp.AutoRandBase, cp.AutoIncrBase, cp.AutoRowIDBase) - tbl, err := tables.TableFromMeta(idAlloc, tableInfo.Core) + tbl, err := tables.TableLightningFromMeta(idAlloc, tableInfo.Core) if err != nil { return nil, errors.Annotatef(err, "failed to tables.TableFromMeta %s", tableName) } @@ -800,13 +800,11 @@ ChunkLoop: arenaSize := mydump.GetArenaSize() memQuota := memLimit / rc.cfg.App.RegionConcurrency / arenaSize * arenaSize - memoryUsageFull := (chunk.FileMeta.ParquetMeta.MemoryUsageFull + arenaSize - 1) / arenaSize * arenaSize - if memQuota > memoryUsageFull { - memoryUsage = memoryUsageFull + if memQuota > chunk.FileMeta.ParquetMeta.MemoryUsageFull { + memoryUsage = chunk.FileMeta.ParquetMeta.MemoryUsageFull chunk.FileMeta.ParquetMeta.UseStreaming = false } else { memoryUsage = chunk.FileMeta.ParquetMeta.MemoryUsage - memoryUsage = (memoryUsage + arenaSize - 1) / arenaSize * arenaSize chunk.FileMeta.ParquetMeta.UseStreaming = true } chunk.FileMeta.ParquetMeta.UseSampleAllocator = false diff --git a/pkg/lightning/backend/kv/sql2kv.go b/pkg/lightning/backend/kv/sql2kv.go index 2457db832ad81..37a135d0187d7 100644 --- a/pkg/lightning/backend/kv/sql2kv.go +++ b/pkg/lightning/backend/kv/sql2kv.go @@ -239,6 +239,7 @@ func (kvcodec *tableKVEncoder) Encode(row []types.Datum, } if common.TableHasAutoRowID(kvcodec.table.Meta()) { + var value types.Datum rowValue := rowID j := columnPermutation[len(kvcodec.Columns)] if j >= 0 && j < len(row) { diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 187e93b3f0b38..4df793cf6403e 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -29,25 +29,47 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" - "golang.org/x/exp/rand" ) +/* + * There are two usage modes for the memory allocation: + * 1. Call `GetDefaultAllocator` directly to get an allocator. + * 2. Call `InitializeGlobalArena` to initialize the global arena pool, + * so the arena allocated in this node can be reused by subsequent allocation. + * User should remember to call `FreeMemory` after the execution is completed. + */ + var ( maxArenaCount = 0 // maximum arena count - arenaDefaultSize = 512 << 20 // size of each arena - leafSize = 256 << 10 // The smallest block size is 256KB + defaultArenaSize = 256 << 20 // size of each arena + + // AllocSize returns actual allocated size in arena + AllocSize func(int) int + + // GetArena creates a new arena + GetArena func(int) arena ) -// SetMaxMemoryUsage set the memory used by parquet reader. -func SetMaxMemoryUsage(size int) { - maxArenaCount = size / arenaDefaultSize +func init() { + AllocSize = simpleGetAllocationSize + GetArena = getSimpleAllocator +} + +// Get the address of a buffer, return 0 if the buffer is nil +func addressOf(buf []byte) uintptr { + if buf == nil || cap(buf) == 0 { + return 0 + } + buf = buf[:1] + return uintptr(unsafe.Pointer(&buf[0])) } // GetArenaSize return the default arena size func GetArenaSize() int { - return arenaDefaultSize + return defaultArenaSize } +// arena is the interface of single allocator type arena interface { allocate(size int) []byte free(bs []byte) @@ -56,62 +78,21 @@ type arena interface { freeAll() } -// Convert slice to an uintptr. This value is used as key in map. -func unsafeGetblkAddr(slice []byte) uintptr { - return uintptr(unsafe.Pointer(&slice[0])) -} - -var ( - arenas []atomic.Value - numArenas atomic.Int32 +type globalArenaPool struct { + arenas chan arena + allocated int lock sync.Mutex - - ctx context.Context - cancel context.CancelFunc - - allocatedOutside atomic.Int64 - allocatedOutsideNum atomic.Int64 -) - -func initArenas() { - arenas = make([]atomic.Value, maxArenaCount) - initNum := min(8, maxArenaCount) - for i := 0; i < initNum; i++ { - a := &buddyAllocator{} - a.init(arenaDefaultSize) - arenas[i].Store(a) - } - numArenas.Store(int32(initNum)) - - ctx, cancel = context.WithCancel(context.Background()) - go getStatus() } -func freeArenas() { - cancel() - for i := 0; i < len(arenas); i++ { - if v := arenas[i].Load(); v != nil { - a := v.(arena) - a.freeAll() - a.reset() - a = nil - // store an empty buddyAllocator to release the memory - arenas[i].Store(&buddyAllocator{}) - } - } - numArenas.Store(0) - arenas = nil -} - -func adjustGCPercent() { +func (ga *globalArenaPool) adjustGCPercent() { gogc := os.Getenv("GOGC") memTotal, err := tidbmemory.MemTotal() if gogc == "" && err == nil { - if numArenas.Load() == 0 { + if ga.allocated == 0 { debug.SetGCPercent(100) return } - percent := int(memTotal)*90/(int(numArenas.Load())*arenaDefaultSize) - 100 + percent := int(memTotal)*90/(ga.allocated*defaultArenaSize) - 100 percent = min(percent, 100) / 10 * 10 percent = max(percent, 5) @@ -121,101 +102,138 @@ func adjustGCPercent() { zap.Int("old", old), zap.Int("new", percent), zap.Int("total memory", int(memTotal)), - zap.Int("allocated memory", int(numArenas.Load())*arenaDefaultSize), + zap.Int("allocated memory", ga.allocated*defaultArenaSize), ) } } -type defaultAllocator struct { - allocatedBuf sync.Map +func (ga *globalArenaPool) get() arena { + // First try to get cached arena + select { + case a := <-ga.arenas: + return a + default: + } + + ga.lock.Lock() + defer ga.lock.Unlock() + + // Create a new one and return + if ga.allocated < maxArenaCount { + ga.allocated++ + bd := GetArena(defaultArenaSize) + ga.adjustGCPercent() + return bd + } + + // We can't create new arena, return nil + return nil +} + +func (ga *globalArenaPool) put(a arena) { + ga.lock.Lock() + defer ga.lock.Unlock() + + // discard it if necessary + if ga.allocated > maxArenaCount { + a.freeAll() + a.reset() + ga.adjustGCPercent() + return + } + + ga.arenas <- a } -func (alloc *defaultAllocator) Init(_ int) { +func (ga *globalArenaPool) free() { + ga.lock.Lock() + defer ga.lock.Unlock() + + ga.allocated = 0 + for len(ga.arenas) > 0 { + a := <-ga.arenas + a.freeAll() + a.reset() + } + ga.adjustGCPercent() +} + +var globalPool *globalArenaPool + +type defaultAllocator struct { + arenas []arena + allocatedBuf map[uintptr]int + + allocatedOutside atomic.Int64 + allocatedOutsideNum atomic.Int64 + cancel context.CancelFunc + wg sync.WaitGroup } -func getStatus() { - tick := time.NewTicker(2 * time.Second) - defer tick.Stop() - for { - select { - case <-tick.C: - l := int(numArenas.Load()) - var totalAllocated int64 - for i := 0; i < l; i++ { - if a := arenas[i].Load().(arena); a != nil { - totalAllocated += a.allocated() +func (alloc *defaultAllocator) init() { + alloc.allocatedBuf = make(map[uintptr]int, 8) + ctx, cancel := context.WithCancel(context.Background()) + alloc.wg.Add(1) + go func() { + defer alloc.wg.Done() + tick := time.NewTicker(2 * time.Second) + defer tick.Stop() + for { + select { + case <-tick.C: + var m runtime.MemStats + runtime.ReadMemStats(&m) + + num := 0 + for _, a := range alloc.arenas { + num += int(a.allocated()) } - } - fmt.Printf("[buddyAllocator] Inside the allocator: %d MiB, outside the allocator: %d MiB(%d blocks)\n", - int(totalAllocated)/1024/1024, - int(allocatedOutsideNum.Load()), - int(allocatedOutside.Load())/1024/1024, - ) - case <-ctx.Done(): - return + fmt.Printf("[Allocator] num arenas = %d, num blocks = %d, outside the allocator: %d MiB(%d blocks)\n", + len(alloc.arenas), num, + int(alloc.allocatedOutsideNum.Load()), + int(alloc.allocatedOutside.Load())/1024/1024, + ) + case <-ctx.Done(): + return + } } - } + }() + alloc.cancel = cancel } func (alloc *defaultAllocator) Allocate(size int) []byte { -START: - // start from a random arena to avoid contention - l := int(numArenas.Load()) - idx := rand.Intn(l) - for i := 0; i < l; i++ { - a := arenas[idx].Load().(arena) + for i, a := range alloc.arenas { if buf := a.allocate(size); buf != nil { - alloc.allocatedBuf.Store(unsafeGetblkAddr(buf), idx) + alloc.allocatedBuf[addressOf(buf)] = i return buf } - idx = (idx + 1) % l - } - - // Can't create new arena, use make to allocate memory - if l == maxArenaCount { - allocatedOutside.Add(int64(size)) - allocatedOutsideNum.Add(1) - return make([]byte, size) } - // Create some new arenas, if someone else has created the arena, just use it. - lock.Lock() - defer lock.Unlock() - for i := 0; i < min(maxArenaCount-l, 2); i++ { - if arenas[l+i].Load() == nil { - a := &buddyAllocator{} - a.init(arenaDefaultSize) - arenas[l+i].Store(a) - numArenas.Add(1) + // If global pool is initialized, get arena from the pool. + // Otherwise, we just create a new one. + var na arena + if globalPool != nil { + if na = globalPool.get(); na == nil { + return make([]byte, size) } + } else { + na = GetArena(defaultArenaSize) } - adjustGCPercent() - idx = int(numArenas.Load()) - 1 - a := arenas[idx].Load().(arena) - if buf := a.allocate(size); buf != nil { - alloc.allocatedBuf.Store(unsafeGetblkAddr(buf), idx) - return buf - } - - // This should rarely happen, goto START to try again - goto START + buf := na.allocate(size) + alloc.allocatedBuf[addressOf(buf)] = len(alloc.arenas) + alloc.arenas = append(alloc.arenas, na) + return buf } func (alloc *defaultAllocator) Free(buf []byte) { - if buf == nil || cap(buf) == 0 { - return + addr := addressOf(buf[:1]) + if arenaID, ok := alloc.allocatedBuf[addr]; ok { + alloc.arenas[arenaID].free(buf) + delete(alloc.allocatedBuf, addr) } - addr := unsafeGetblkAddr(buf[:1]) - arenaID, ok := alloc.allocatedBuf.Load(addr) - if !ok { - return - } - - arenas[arenaID.(int)].Load().(arena).free(buf) - alloc.allocatedBuf.Delete(addr) } func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { @@ -223,17 +241,40 @@ func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { return alloc.Allocate(size) } -var once sync.Once +func (alloc *defaultAllocator) Close() { + alloc.cancel() + alloc.wg.Wait() + + // If global pool is initialized, return allocated arena to the pool. + if globalPool != nil { + for _, a := range alloc.arenas { + a.freeAll() + a.reset() + globalPool.put(a) + } + } + + alloc.arenas = nil +} // GetDefaultAllocator get a default allocator func GetDefaultAllocator() memory.Allocator { - once.Do(func() { - initArenas() - }) - return &defaultAllocator{} + a := &defaultAllocator{} + a.init() + return a +} + +// InitializeGlobalArena initialize a global arena pool. +// If you call this function, remember to call FreeMemory. +func InitializeGlobalArena(size int) { + maxArenaCount = size / defaultArenaSize + globalPool = &globalArenaPool{} + globalPool.arenas = make(chan arena, maxArenaCount) } // FreeMemory free all the memory allocated for arenas. func FreeMemory() { - freeArenas() + if globalPool != nil { + globalPool.free() + } } diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index 2f34f5908859b..5ee79ff252bdd 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -18,8 +18,22 @@ import ( "fmt" "sync" "sync/atomic" + + "github.com/pingcap/tidb/pkg/util/mathutil" ) +const leafSize = 256 << 10 // The smallest block size is 256KB + +func buddyGetAllocationSize(size int) int { + return int(mathutil.NextPowerOfTwo(int64(size))) +} + +func getBuddyAllocator(size int) arena { + a := &buddyAllocator{} + a.init(size) + return a +} + func roundUp(n, sz int) int { return (n + sz - 1) / sz * sz } @@ -193,7 +207,7 @@ func (b *buddyAllocator) allocate(nbytes int) []byte { buf := b.buffer[offset : offset+nbytes] b.allocatedBytes.Add(int64(blkSize(l))) - addr := unsafeGetblkAddr(buf) + addr := addressOf(buf) if off, ok := b.allocatedBuf[addr]; ok { fmt.Println("duplicated allocation", addr, offset, off) panic("duplicated allocation") @@ -213,7 +227,7 @@ func (b *buddyAllocator) free(bs []byte) { bs = bs[:1] } - addr := unsafeGetblkAddr(bs) + addr := addressOf(bs) offset, ok := b.allocatedBuf[addr] if !ok { return diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 8235cedff4974..26ccb9e898727 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -922,7 +922,7 @@ func SampleParquetFileProperty( } avgRowSize = float64(rowSize) / float64(rowCount) - memoryUsage, memoryUsageFull = parser.GetMemoryUage() + memoryUsage, memoryUsageFull = parser.GetMemoryUsage() return avgRowSize, memoryUsage, memoryUsageFull, nil } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 1ea2572254c3e..959e56a8a08ea 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -364,7 +364,7 @@ type ParquetParser struct { } // GetMemoryUage estimate the memory usage for this file. -func (pp *ParquetParser) GetMemoryUage() (int, int) { +func (pp *ParquetParser) GetMemoryUsage() (int, int) { // Initialize column reader if pp.dumpers[0].reader == nil { pp.ReadRow() @@ -386,44 +386,47 @@ func (pp *ParquetParser) GetMemoryUage() (int, int) { } bufSizes := alloc.allocated - // We have collected all the allocation for one column chunk. - // The allocation order are: - // read buffer, decompressed dict buffer, compressed buffer, decompressed data page buffer, compressed data page buffer... - // and compressed buffer is released after decompression. - // So we estimate the memory usage as: - // (roundToPower2(decompressed dict buffer) + roundToPower2(decompressed data page buffer) + roundToPower2(read buffer) + roundToPower2(parquet read buffer)) * num_cols + /* + * We have collected all the allocations, and the allocation order are: + * read buffer(repeat n times), decompressed dict buffer, compressed buffer, decompressed data page buffer, compressed data page buffer, ... + * since the compressed buffer is released after decompression, we estimate the memory usage as: + * (AllocSize(decompressed dict buffer) + AllocSize(decompressed data page buffer) + AllocSize(read buffer) + AllocSize(parquet read buffer)) * num_cols + */ + numColumns := len(pp.columnNames) dictUsage := 0 dataPageUsage := 0 - readBufferUsage := roundToPower2(bufSizes[0]) + roundToPower2(defaultBufSize) + readBufferUsageStream := (AllocSize(bufSizes[0]) + AllocSize(defaultBufSize)) * numColumns - readBufferUsageTotal := 0 + readBufferUsageNonStream := 0 meta := pp.readers[0].MetaData() for _, rg := range meta.RowGroups { currUsage := 0 for _, c := range rg.Columns { - currUsage += roundToPower2(int(c.MetaData.GetTotalCompressedSize())) + currUsage += AllocSize(int(c.MetaData.GetTotalCompressedSize())) } - readBufferUsageTotal = max(readBufferUsageTotal, currUsage) + readBufferUsageNonStream = max(readBufferUsageNonStream, currUsage) } - readBufferUsageTotal += roundToPower2(defaultBufSize) * len(pp.columnNames) + readBufferUsageNonStream += AllocSize(defaultBufSize) * len(pp.columnNames) - if len(bufSizes) == 3 { - dataPageUsage = roundToPower2(bufSizes[1]) - } else { - dictUsage = roundToPower2(bufSizes[1]) - for i := 3; i < len(bufSizes); i += 2 { - dataPageUsage = max(bufSizes[i], dataPageUsage) - } - dataPageUsage = roundToPower2(dataPageUsage) + for i := numColumns; i < 5*numColumns; i += 4 { + dictUsage = max(dictUsage, AllocSize(bufSizes[i])) + dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i+2])) + } + for i := 5 * numColumns; i < len(bufSizes); i += 2 { + dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) } - return (dataPageUsage + dictUsage + readBufferUsage) * len(pp.columnNames), (dataPageUsage+dictUsage)*len(pp.columnNames) + readBufferUsageTotal + + pageUsage := (dataPageUsage + dictUsage) * numColumns + + return roundUp(pageUsage+readBufferUsageStream, defaultArenaSize), + roundUp(pageUsage+readBufferUsageNonStream, defaultArenaSize) } func (pp *ParquetParser) setStringData(readNum, col, offset int) { buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") + pp.rows[offset+i][col].SetBytesAsString(buf[i], "utf8mb4_bin", uint32(len(buf[i]))) } } @@ -764,6 +767,10 @@ func (pp *ParquetParser) Close() error { return errors.Trace(err) } } + + if a, ok := pp.alloc.(interface{ Close() }); ok { + a.Close() + } return nil } @@ -989,17 +996,6 @@ func (sa *sampleAllocator) Reallocate(size int, buf []byte) []byte { return make([]byte, size) } -func roundToPower2(n int) int { - v := uint(n) - v-- - v |= v >> 1 - v |= v >> 2 - v |= v >> 4 - v |= v >> 8 - v |= v >> 16 - return max(int(v+1), 256<<10) -} - // NewParquetParserWithMeta generates a parquet parser. func NewParquetParserWithMeta( ctx context.Context, diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go new file mode 100644 index 0000000000000..2a3165fe84808 --- /dev/null +++ b/pkg/lightning/mydump/simple_allocator.go @@ -0,0 +1,209 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "math" +) + +const ( + alignSize = 64 << 10 + metaSize = 64 + invalid = math.MaxInt32 +) + +func simpleGetAllocationSize(size int) int { + return roundUp(size+metaSize, alignSize) * 2 +} + +func getSimpleAllocator(size int) arena { + a := &simpleAllocator{} + a.init(size) + return a +} + +func storeInt(value int, buf []byte) { + buf[0] = byte(value >> 24) + buf[1] = byte(value >> 16) + buf[2] = byte(value >> 8) + buf[3] = byte(value) +} + +func readInt(buf []byte) int { + return int(buf[0])<<24 | int(buf[1])<<16 | int(buf[2])<<8 | int(buf[3]) +} + +type simpleAllocator struct { + freeBuf map[int]int + buf []byte + base int + numAlloc int + firstFree int + alloc int +} + +func (sa *simpleAllocator) init(bufSize int) { + sa.buf = make([]byte, bufSize) + sa.base = int(addressOf(sa.buf)) + sa.reset() +} + +func (sa *simpleAllocator) getOffset(buf []byte) int { + return int(addressOf(buf)) - sa.base - metaSize +} + +func (sa *simpleAllocator) setBlk(offset, prev, next, blkSize int) { + if blkSize >= 0 { + storeInt(blkSize, sa.buf[offset:offset+4]) + } + if prev >= 0 { + storeInt(prev, sa.buf[offset+4:offset+8]) + } + if next >= 0 { + storeInt(next, sa.buf[offset+8:offset+12]) + } +} + +func (sa *simpleAllocator) getBlk(offset int) (prev, next, blkSize int) { + blkSize = readInt(sa.buf[offset : offset+4]) + prev = readInt(sa.buf[offset+4 : offset+8]) + next = readInt(sa.buf[offset+8 : offset+12]) + return +} + +func (sa *simpleAllocator) insertFree(free int) { + for offset := sa.firstFree; offset != invalid; { + if free > offset { + _, _, blkSize := sa.getBlk(free) + _, next, _ := sa.getBlk(offset) + sa.setBlk(offset, -1, free, -1) + sa.setBlk(free, offset, next, -1) + sa.setBlk(next, free, -1, -1) + sa.alloc -= blkSize + return + } + } + panic("Error insertFree") +} + +// Merge adjacent free blocks into one big free block +func (sa *simpleAllocator) merge() { + for offset := sa.firstFree; offset != invalid; { + _, next, blkSize := sa.getBlk(offset) + if offset+blkSize == next { + _, nextnext, nextBlkSize := sa.getBlk(next) + sa.setBlk(offset, -1, nextnext, blkSize+nextBlkSize) + sa.setBlk(nextnext, offset, -1, -1) + } else { + offset = next + } + } +} + +func (sa *simpleAllocator) allocate(size int) []byte { + sa.merge() + + allocSize := roundUp(size+metaSize, alignSize) + + bestOffset := -1 + minRemain := math.MaxInt32 + + for offset := sa.firstFree; offset != invalid; { + _, next, blkSize := sa.getBlk(offset) + if offset+blkSize >= len(sa.buf) { + panic("Error blk size") + } + if blkSize > allocSize && blkSize-allocSize < minRemain { + bestOffset = offset + minRemain = blkSize - allocSize + if minRemain == 0 { + break + } + } + offset = next + } + + if bestOffset == -1 { + return nil + } + + if minRemain == 0 { + prev, next, _ := sa.getBlk(bestOffset) + sa.setBlk(prev, -1, next, -1) + sa.setBlk(next, prev, -1, -1) + } else { + sa.setBlk(bestOffset, -1, -1, minRemain) + } + + sa.numAlloc++ + sa.alloc += allocSize + bufStart := bestOffset + minRemain + sa.setBlk(bufStart, -1, -1, allocSize) + sa.sanityCheck() + return sa.buf[bufStart+metaSize : bufStart+metaSize+size] +} + +func (sa *simpleAllocator) free(buf []byte) { + offset := sa.getOffset(buf) + if offset < 0 || offset > len(sa.buf) { + return + } + + sa.numAlloc-- + if sa.numAlloc == 0 { + sa.reset() + return + } + + sa.insertFree(offset) + sa.sanityCheck() +} + +func (sa *simpleAllocator) reallocate(buf []byte, size int) []byte { + sa.free(buf) + return sa.allocate(size) +} + +func (sa *simpleAllocator) allocated() int64 { + return int64(sa.numAlloc) +} + +func (sa *simpleAllocator) freeAll() { + sa.reset() +} + +func (sa *simpleAllocator) sanityCheck() { + mem := sa.alloc + for offset := sa.firstFree; offset != invalid; { + _, next, blkSize := sa.getBlk(offset) + mem += blkSize + offset = next + } + if mem != (len(sa.buf) - 3*alignSize) { + panic("sanity check failed") + } +} + +func (sa *simpleAllocator) reset() { + sa.freeBuf = make(map[int]int, 32) + sa.alloc = 0 + + // Add dummy head and tail + total := len(sa.buf) + sa.setBlk(0, invalid, alignSize, 0) + sa.setBlk(alignSize, 0, total-alignSize, total-alignSize*3) + sa.setBlk(total-alignSize, alignSize, invalid, 0) + sa.firstFree = 0 +} From 14c0e53dd229c34fed75ea30eb9510c53e490591 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 17 Jan 2025 18:00:31 +0800 Subject: [PATCH 34/93] Fix bazel --- DEPS.bzl | 13 ----- pkg/lightning/mydump/BUILD.bazel | 7 ++- pkg/lightning/mydump/allocator_test.go | 70 +++++++++++++++++++++++++ pkg/lightning/mydump/buddy_allocator.go | 5 ++ 4 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 pkg/lightning/mydump/allocator_test.go diff --git a/DEPS.bzl b/DEPS.bzl index c7882923b6788..1586f47124c16 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -7150,19 +7150,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tiancaiamao/gp/com_github_tiancaiamao_gp-v0.0.0-20221230034425-4025bc8a4d4a.zip", ], ) - go_repository( - name = "com_github_tidwall_btree", - build_file_proto_mode = "disable_global", - importpath = "github.com/tidwall/btree", - sha256 = "4a6619eb936c836841702933a9d66f27abe83b7ffb541de44d12db4aa3a809d5", - strip_prefix = "github.com/tidwall/btree@v1.7.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/tidwall/btree/com_github_tidwall_btree-v1.7.0.zip", - "http://ats.apps.svc/gomod/github.com/tidwall/btree/com_github_tidwall_btree-v1.7.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/tidwall/btree/com_github_tidwall_btree-v1.7.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/tidwall/btree/com_github_tidwall_btree-v1.7.0.zip", - ], - ) go_repository( name = "com_github_tidwall_gjson", build_file_proto_mode = "disable_global", diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 0e47ea007db29..55b0314971fe7 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "mydump", srcs = [ + "allocator.go", "buddy_allocator.go", "bytes.go", "charset_convertor.go", @@ -15,6 +16,7 @@ go_library( "region.go", "router.go", "schema_import.go", + "simple_allocator.go", ], importpath = "github.com/pingcap/tidb/pkg/lightning/mydump", visibility = ["//visibility:public"], @@ -34,6 +36,8 @@ go_library( "//pkg/types", "//pkg/util", "//pkg/util/filter", + "//pkg/util/intest", + "//pkg/util/mathutil", "//pkg/util/memory", "//pkg/util/regexpr-router", "//pkg/util/set", @@ -41,11 +45,11 @@ go_library( "//pkg/util/sqlescape", "//pkg/util/table-filter", "//pkg/util/zeropool", + "@com_github_go_sql_driver_mysql//:mysql", "@com_github_joechenrh_arrow_go_v18//arrow/memory", "@com_github_joechenrh_arrow_go_v18//parquet", "@com_github_joechenrh_arrow_go_v18//parquet/file", "@com_github_joechenrh_arrow_go_v18//parquet/schema", - "@com_github_go_sql_driver_mysql//:mysql", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_spkg_bom//:bom", @@ -62,6 +66,7 @@ go_test( name = "mydump_test", timeout = "short", srcs = [ + "allocator_test.go", "charset_convertor_test.go", "csv_parser_test.go", "loader_test.go", diff --git a/pkg/lightning/mydump/allocator_test.go b/pkg/lightning/mydump/allocator_test.go new file mode 100644 index 0000000000000..a7e0bc08a2c6a --- /dev/null +++ b/pkg/lightning/mydump/allocator_test.go @@ -0,0 +1,70 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "context" + "math/rand" + "runtime" + "sync" + "testing" + "time" +) + +func TestSimpleAllocator(t *testing.T) { + alignSize = 1 << 10 + + totalSize := 1 << 20 + a := simpleAllocator{} + a.init(totalSize) + + var ( + lk sync.Mutex + wg sync.WaitGroup + ) + + allocSize := []int{1 << 10, 2 << 10, 4 << 10, 8 << 10, 16 << 10, 32 << 10, 64 << 10, 128 << 10} + + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + + allocFunc := func(ctx context.Context) { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + lk.Lock() + bufSize := allocSize[rand.Intn(len(allocSize))] + buf := a.allocate(bufSize) + lk.Unlock() + + time.Sleep(time.Millisecond) + + lk.Lock() + a.free(buf) + lk.Unlock() + } + } + } + + numCPU := runtime.NumCPU() + for i := 0; i < numCPU*2; i++ { + wg.Add(1) + go allocFunc(ctx) + } + wg.Wait() +} diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index 5ee79ff252bdd..b5e54e78d2956 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -19,6 +19,7 @@ import ( "sync" "sync/atomic" + "github.com/pingcap/tidb/pkg/util/intest" "github.com/pingcap/tidb/pkg/util/mathutil" ) @@ -435,6 +436,10 @@ func (b *buddyAllocator) init(totalSize int) { } func (b *buddyAllocator) sanityCheck() { + if !intest.InTest { + return + } + free := 0 for _, binfo := range b.bufInfo { blkSize := blkSize(binfo.l) From 0b76e09917255638f1c20f91bfffb478941e6239 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 17 Jan 2025 18:48:44 +0800 Subject: [PATCH 35/93] Fix bazel --- lightning/pkg/importer/table_import.go | 2 +- pkg/lightning/mydump/allocator.go | 1 + pkg/lightning/mydump/buddy_allocator.go | 1 - pkg/lightning/mydump/parquet_parser.go | 21 +++++++++++++-------- pkg/lightning/mydump/simple_allocator.go | 14 +++++++++++--- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 9cee3fe4f960f..32ab3d8d50eda 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -109,7 +109,7 @@ func NewTableImporter( logger log.Logger, ) (*TableImporter, error) { idAlloc := kv.NewPanickingAllocatorsWithBase(tableInfo.Core.SepAutoInc(), cp.AutoRandBase, cp.AutoIncrBase, cp.AutoRowIDBase) - tbl, err := tables.TableLightningFromMeta(idAlloc, tableInfo.Core) + tbl, err := tables.TableFromMeta(idAlloc, tableInfo.Core) if err != nil { return nil, errors.Annotatef(err, "failed to tables.TableFromMeta %s", tableName) } diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 4df793cf6403e..9be41906c3298 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -97,6 +97,7 @@ func (ga *globalArenaPool) adjustGCPercent() { percent = max(percent, 5) old := debug.SetGCPercent(percent) + //nolint: all_revive,revive runtime.GC() log.L().Debug("set gc percentage", zap.Int("old", old), diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go index b5e54e78d2956..370583f0416da 100644 --- a/pkg/lightning/mydump/buddy_allocator.go +++ b/pkg/lightning/mydump/buddy_allocator.go @@ -350,7 +350,6 @@ func (b *buddyAllocator) initFreePair(l, bi int) (free int) { } else { b.bufInfo[l].push(blkAddr(l, bi)) } - } return } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 959e56a8a08ea..6949a694cbf6c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -19,6 +19,7 @@ import ( "context" "fmt" "io" + "math" "math/big" "strings" "sync/atomic" @@ -363,17 +364,19 @@ type ParquetParser struct { logger log.Logger } -// GetMemoryUage estimate the memory usage for this file. -func (pp *ParquetParser) GetMemoryUsage() (int, int) { +// GetMemoryUsage estimate the memory usage for this file. +func (pp *ParquetParser) GetMemoryUsage() (memoryUsageStream, memoryUsageNonStream int) { // Initialize column reader if pp.dumpers[0].reader == nil { - pp.ReadRow() + if err := pp.ReadRow(); err != nil { + return math.MaxInt, math.MaxInt + } } // All the columns share the same data page size, // so we only need to read one column chunk. dumper := pp.dumpers[0] - for true { + for { read := dumper.readNextBatch(defaultBatchSize) if read == 0 { break @@ -447,7 +450,7 @@ func (pp *ParquetParser) setUint32Data(readNum, col, offset int) { func (pp *ParquetParser) setInt64Data(readNum, col, offset int) { buf, _ := pp.dumpers[col].valueBuffer.([]int64) for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetInt64(int64(buf[i])) + pp.rows[offset+i][col].SetInt64(buf[i]) } } @@ -975,7 +978,9 @@ func NewParquetParser( alloc: allocator, logger: log.FromContext(ctx), } - parser.Init() + if err := parser.Init(); err != nil { + return nil, errors.Trace(err) + } return parser, nil } @@ -989,9 +994,9 @@ func (sa *sampleAllocator) Allocate(size int) []byte { return make([]byte, size) } -func (sa *sampleAllocator) Free(buf []byte) {} +func (_ *sampleAllocator) Free(_ []byte) {} -func (sa *sampleAllocator) Reallocate(size int, buf []byte) []byte { +func (sa *sampleAllocator) Reallocate(size int, _ []byte) []byte { sa.allocated = append(sa.allocated, size) return make([]byte, size) } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 2a3165fe84808..8d9db13ef9b9d 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -16,14 +16,18 @@ package mydump import ( "math" + + "github.com/pingcap/tidb/pkg/util/intest" ) const ( - alignSize = 64 << 10 - metaSize = 64 - invalid = math.MaxInt32 + metaSize = 64 + invalid = math.MaxInt32 ) +// This value will be modifed in test +var alignSize = 64 << 10 + func simpleGetAllocationSize(size int) int { return roundUp(size+metaSize, alignSize) * 2 } @@ -185,6 +189,10 @@ func (sa *simpleAllocator) freeAll() { } func (sa *simpleAllocator) sanityCheck() { + if !intest.InTest { + return + } + mem := sa.alloc for offset := sa.firstFree; offset != invalid; { _, next, blkSize := sa.getBlk(offset) From 155ed91c8adde3abdf8268b2f5f8d180a234bab4 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 17 Jan 2025 18:58:23 +0800 Subject: [PATCH 36/93] Fix --- pkg/lightning/mydump/parquet_parser.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 6949a694cbf6c..318f2cba24682 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -994,7 +994,7 @@ func (sa *sampleAllocator) Allocate(size int) []byte { return make([]byte, size) } -func (_ *sampleAllocator) Free(_ []byte) {} +func (*sampleAllocator) Free([]byte) {} func (sa *sampleAllocator) Reallocate(size int, _ []byte) []byte { sa.allocated = append(sa.allocated, size) @@ -1080,7 +1080,9 @@ func NewParquetParserWithMeta( alloc: allocator, logger: log.FromContext(ctx), } - parser.Init() + if err := parser.Init(); err != nil { + return nil, errors.Trace(err) + } return parser, nil } From f98e27202ed5575b2878a422223ae5251468126d Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 21 Jan 2025 13:56:57 +0800 Subject: [PATCH 37/93] Fix allocator --- pkg/lightning/mydump/allocator.go | 8 ++------ pkg/lightning/mydump/simple_allocator.go | 10 ++-------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 9be41906c3298..3615663fa1dd0 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -71,11 +71,10 @@ func GetArenaSize() int { // arena is the interface of single allocator type arena interface { - allocate(size int) []byte - free(bs []byte) + allocate(int) []byte + free([]byte) allocated() int64 reset() - freeAll() } type globalArenaPool struct { @@ -137,7 +136,6 @@ func (ga *globalArenaPool) put(a arena) { // discard it if necessary if ga.allocated > maxArenaCount { - a.freeAll() a.reset() ga.adjustGCPercent() return @@ -153,7 +151,6 @@ func (ga *globalArenaPool) free() { ga.allocated = 0 for len(ga.arenas) > 0 { a := <-ga.arenas - a.freeAll() a.reset() } ga.adjustGCPercent() @@ -249,7 +246,6 @@ func (alloc *defaultAllocator) Close() { // If global pool is initialized, return allocated arena to the pool. if globalPool != nil { for _, a := range alloc.arenas { - a.freeAll() a.reset() globalPool.put(a) } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 8d9db13ef9b9d..3ba06b93f8bd2 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -26,7 +26,7 @@ const ( ) // This value will be modifed in test -var alignSize = 64 << 10 +var alignSize = 16 << 10 func simpleGetAllocationSize(size int) int { return roundUp(size+metaSize, alignSize) * 2 @@ -50,7 +50,6 @@ func readInt(buf []byte) int { } type simpleAllocator struct { - freeBuf map[int]int buf []byte base int numAlloc int @@ -129,7 +128,7 @@ func (sa *simpleAllocator) allocate(size int) []byte { if offset+blkSize >= len(sa.buf) { panic("Error blk size") } - if blkSize > allocSize && blkSize-allocSize < minRemain { + if blkSize >= allocSize && blkSize-allocSize < minRemain { bestOffset = offset minRemain = blkSize - allocSize if minRemain == 0 { @@ -184,10 +183,6 @@ func (sa *simpleAllocator) allocated() int64 { return int64(sa.numAlloc) } -func (sa *simpleAllocator) freeAll() { - sa.reset() -} - func (sa *simpleAllocator) sanityCheck() { if !intest.InTest { return @@ -205,7 +200,6 @@ func (sa *simpleAllocator) sanityCheck() { } func (sa *simpleAllocator) reset() { - sa.freeBuf = make(map[int]int, 32) sa.alloc = 0 // Add dummy head and tail From e188b2ebf1b0986e1379d5b8ed7a78c69dbc7a25 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 5 Feb 2025 10:52:17 +0800 Subject: [PATCH 38/93] Remove some test codes --- lightning/cmd/tidb-lightning/main.go | 52 +-- pkg/lightning/mydump/BUILD.bazel | 2 - pkg/lightning/mydump/allocator.go | 93 ++--- pkg/lightning/mydump/buddy_allocator.go | 463 ----------------------- pkg/lightning/mydump/simple_allocator.go | 4 + 5 files changed, 33 insertions(+), 581 deletions(-) delete mode 100644 pkg/lightning/mydump/buddy_allocator.go diff --git a/lightning/cmd/tidb-lightning/main.go b/lightning/cmd/tidb-lightning/main.go index 17535171e853d..d68d6033acc4f 100644 --- a/lightning/cmd/tidb-lightning/main.go +++ b/lightning/cmd/tidb-lightning/main.go @@ -19,11 +19,8 @@ import ( "fmt" "os" "os/signal" - "runtime" "runtime/debug" - "runtime/pprof" "syscall" - "time" "github.com/pingcap/tidb/lightning/pkg/server" "github.com/pingcap/tidb/lightning/pkg/web" @@ -32,56 +29,9 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" - - "net/http" - _ "net/http/pprof" ) -func bToMb(b uint64) uint64 { - return b / (1024 * 1024) -} - -func TrackSysMemUsage(ctx context.Context) { - tick := time.NewTicker(time.Second) - for { - select { - case <-ctx.Done(): - return - case <-tick.C: - var m runtime.MemStats - runtime.ReadMemStats(&m) - - fmt.Printf("HeapInUse = %v MiB, limit = %d MiB, canReturn = %dMiB\n", - bToMb(m.HeapInuse), bToMb(m.Sys-m.HeapReleased), bToMb(m.HeapIdle-m.HeapReleased)) - } - } -} - func main() { - go func() { - http.ListenAndServe("0.0.0.0:8899", nil) - }() - - // Create a memory profile file - f, err := os.Create("mem.pprof") - if err != nil { - fmt.Println("Failed to create memory profile file:", err) - return - } - defer f.Close() - - // Start the memory profile - if err := pprof.StartCPUProfile(f); err != nil { - fmt.Println("Failed to start memory profile:", err) - return - } - defer pprof.StopCPUProfile() - - // Track heap in use - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go TrackSysMemUsage(ctx) - globalCfg := config.Must(config.LoadGlobalConfig(os.Args[1:], nil)) logToFile := globalCfg.App.File != "" && globalCfg.App.File != "-" if logToFile { @@ -125,7 +75,7 @@ func main() { } } - err = app.GoServe() + err := app.GoServe() if err != nil { logger.Error("failed to start HTTP server", zap.Error(err)) fmt.Fprintln(os.Stderr, "failed to start HTTP server:", err) diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 55b0314971fe7..27a5e9235340e 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -4,7 +4,6 @@ go_library( name = "mydump", srcs = [ "allocator.go", - "buddy_allocator.go", "bytes.go", "charset_convertor.go", "csv_parser.go", @@ -37,7 +36,6 @@ go_library( "//pkg/util", "//pkg/util/filter", "//pkg/util/intest", - "//pkg/util/mathutil", "//pkg/util/memory", "//pkg/util/regexpr-router", "//pkg/util/set", diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 3615663fa1dd0..072aed70b8db9 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -15,14 +15,11 @@ package mydump import ( - "context" - "fmt" "os" "runtime" "runtime/debug" "sync" "sync/atomic" - "time" "unsafe" "github.com/joechenrh/arrow-go/v18/arrow/memory" @@ -77,22 +74,22 @@ type arena interface { reset() } -type globalArenaPool struct { +type arenaPool struct { arenas chan arena allocated int lock sync.Mutex } -func (ga *globalArenaPool) adjustGCPercent() { +func (ap *arenaPool) adjustGCPercent() { gogc := os.Getenv("GOGC") memTotal, err := tidbmemory.MemTotal() if gogc == "" && err == nil { - if ga.allocated == 0 { + if ap.allocated == 0 { debug.SetGCPercent(100) return } - percent := int(memTotal)*90/(ga.allocated*defaultArenaSize) - 100 - percent = min(percent, 100) / 10 * 10 + percent := int(memTotal)*90/(ap.allocated*defaultArenaSize) - 100 + percent = min(percent, 50) / 10 * 10 percent = max(percent, 5) old := debug.SetGCPercent(percent) @@ -102,27 +99,27 @@ func (ga *globalArenaPool) adjustGCPercent() { zap.Int("old", old), zap.Int("new", percent), zap.Int("total memory", int(memTotal)), - zap.Int("allocated memory", ga.allocated*defaultArenaSize), + zap.Int("allocated memory", ap.allocated*defaultArenaSize), ) } } -func (ga *globalArenaPool) get() arena { +func (ap *arenaPool) get() arena { // First try to get cached arena select { - case a := <-ga.arenas: + case a := <-ap.arenas: return a default: } - ga.lock.Lock() - defer ga.lock.Unlock() + ap.lock.Lock() + defer ap.lock.Unlock() // Create a new one and return - if ga.allocated < maxArenaCount { - ga.allocated++ + if ap.allocated < maxArenaCount { + ap.allocated++ bd := GetArena(defaultArenaSize) - ga.adjustGCPercent() + ap.adjustGCPercent() return bd } @@ -130,33 +127,33 @@ func (ga *globalArenaPool) get() arena { return nil } -func (ga *globalArenaPool) put(a arena) { - ga.lock.Lock() - defer ga.lock.Unlock() +func (ap *arenaPool) put(a arena) { + ap.lock.Lock() + defer ap.lock.Unlock() // discard it if necessary - if ga.allocated > maxArenaCount { + if ap.allocated > maxArenaCount { a.reset() - ga.adjustGCPercent() + ap.adjustGCPercent() return } - ga.arenas <- a + ap.arenas <- a } -func (ga *globalArenaPool) free() { - ga.lock.Lock() - defer ga.lock.Unlock() +func (ap *arenaPool) free() { + ap.lock.Lock() + defer ap.lock.Unlock() - ga.allocated = 0 - for len(ga.arenas) > 0 { - a := <-ga.arenas + ap.allocated = 0 + for len(ap.arenas) > 0 { + a := <-ap.arenas a.reset() } - ga.adjustGCPercent() + ap.adjustGCPercent() } -var globalPool *globalArenaPool +var globalPool *arenaPool type defaultAllocator struct { arenas []arena @@ -164,41 +161,10 @@ type defaultAllocator struct { allocatedOutside atomic.Int64 allocatedOutsideNum atomic.Int64 - - cancel context.CancelFunc - wg sync.WaitGroup } func (alloc *defaultAllocator) init() { alloc.allocatedBuf = make(map[uintptr]int, 8) - ctx, cancel := context.WithCancel(context.Background()) - alloc.wg.Add(1) - go func() { - defer alloc.wg.Done() - tick := time.NewTicker(2 * time.Second) - defer tick.Stop() - for { - select { - case <-tick.C: - var m runtime.MemStats - runtime.ReadMemStats(&m) - - num := 0 - for _, a := range alloc.arenas { - num += int(a.allocated()) - } - - fmt.Printf("[Allocator] num arenas = %d, num blocks = %d, outside the allocator: %d MiB(%d blocks)\n", - len(alloc.arenas), num, - int(alloc.allocatedOutsideNum.Load()), - int(alloc.allocatedOutside.Load())/1024/1024, - ) - case <-ctx.Done(): - return - } - } - }() - alloc.cancel = cancel } func (alloc *defaultAllocator) Allocate(size int) []byte { @@ -240,9 +206,6 @@ func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { } func (alloc *defaultAllocator) Close() { - alloc.cancel() - alloc.wg.Wait() - // If global pool is initialized, return allocated arena to the pool. if globalPool != nil { for _, a := range alloc.arenas { @@ -265,7 +228,7 @@ func GetDefaultAllocator() memory.Allocator { // If you call this function, remember to call FreeMemory. func InitializeGlobalArena(size int) { maxArenaCount = size / defaultArenaSize - globalPool = &globalArenaPool{} + globalPool = &arenaPool{} globalPool.arenas = make(chan arena, maxArenaCount) } diff --git a/pkg/lightning/mydump/buddy_allocator.go b/pkg/lightning/mydump/buddy_allocator.go deleted file mode 100644 index 370583f0416da..0000000000000 --- a/pkg/lightning/mydump/buddy_allocator.go +++ /dev/null @@ -1,463 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mydump - -import ( - "fmt" - "sync" - "sync/atomic" - - "github.com/pingcap/tidb/pkg/util/intest" - "github.com/pingcap/tidb/pkg/util/mathutil" -) - -const leafSize = 256 << 10 // The smallest block size is 256KB - -func buddyGetAllocationSize(size int) int { - return int(mathutil.NextPowerOfTwo(int64(size))) -} - -func getBuddyAllocator(size int) arena { - a := &buddyAllocator{} - a.init(size) - return a -} - -func roundUp(n, sz int) int { - return (n + sz - 1) / sz * sz -} - -// Compute block size at layer l -func blkSize(l int) int { - return (1 << l) * leafSize -} - -// Compute the block index for offset at layer l -func blkIndex(l, offset int) int { - return offset / blkSize(l) -} - -// Compute the first block index at layer l after offset -func blkIndexNext(l, offset int) int { - blkSize := blkSize(l) - bi := offset / blkSize - if offset%blkSize != 0 { - bi++ - } - return bi -} - -// Convert a block index at layer l back into an offset -func blkAddr(l, bi int) int { - return bi * blkSize(l) -} - -// Return 1 if bit at position index in array is set to 1 -func bitIsSet(arr []byte, index int) bool { - b := int(arr[index/8]) - m := (1 << (index % 8)) - return (b & m) == m -} - -// Set bit at position index in array to 1 -func bitSet(arr []byte, index int) { - b := int(arr[index/8]) - m := (1 << (index % 8)) - arr[index/8] = byte(b | m) -} - -// Clear bit at position index in array -func bitClear(arr []byte, index int) { - b := int(arr[index/8]) - m := (1 << (index % 8)) - arr[index/8] = byte(b & ^m) -} - -// Return the first layer whose block size is larger than n -func firstLayer(n int) int { - l := 0 - for size := leafSize; size < n; size *= 2 { - l++ - } - return l -} - -// The allocator has bufferInfo for each size k. Each bufferInfo has a free -// list, an array alloc to keep track which blocks have been -// allocated, and an split array to to keep track which blocks have -// been split. The arrays are of type char (which is 1 byte), but the -// allocator uses 1 bit per block (thus, one char records the info of -// 8 blocks). -type bufferInfo struct { - alloc []byte - split []byte - canAllocate []byte - - l int - nblk int - freeCnt int -} - -func (binfo *bufferInfo) init(nblk, l int) { - sz := roundUp(nblk, 8) / 8 - binfo.canAllocate = make([]byte, nblk) - binfo.alloc = make([]byte, sz) - binfo.split = make([]byte, sz) - binfo.l = l -} - -// Remove buffer at offset in this layer as non-allocatable. -func (binfo *bufferInfo) remove(offset int) { - binfo.freeCnt-- - bitClear(binfo.canAllocate, blkIndex(binfo.l, offset)) -} - -// Check whether there are available buffer in this layer. -func (binfo *bufferInfo) empty() bool { - return binfo.freeCnt == 0 -} - -// Add buffer at offset in this layer as allocatable -func (binfo *bufferInfo) push(offset int) { - binfo.freeCnt++ - bitSet(binfo.canAllocate, blkIndex(binfo.l, offset)) -} - -// Get one free buffer in this layer -func (binfo *bufferInfo) pop() int { - for bi := 0; bi < binfo.nblk; bi++ { - if bitIsSet(binfo.canAllocate, bi) { - bitClear(binfo.canAllocate, bi) - binfo.freeCnt-- - return blkAddr(binfo.l, bi) - } - } - return -1 -} - -// buffer is represented as an offset. -type buddyAllocator struct { - buffer []byte - bufInfo []bufferInfo - nLayers int - maxLayer int - - allocatedBuf map[uintptr]int - - allocatedBytes atomic.Int64 - unavailable int - total int - - lock sync.Mutex -} - -// Find the layer of the block at offset -func (b *buddyAllocator) layer(offset int) int { - for k := 0; k < b.maxLayer; k++ { - if bitIsSet(b.bufInfo[k+1].split, blkIndex(k+1, offset)) { - return k - } - } - return b.maxLayer -} - -// Allocate nbytes, but malloc won't return anything smaller than LeafSize -func (b *buddyAllocator) allocate(nbytes int) []byte { - b.lock.Lock() - defer b.lock.Unlock() - - // Find a free block >= nbytes, starting with lowest layer possible - fl := firstLayer(nbytes) - l := fl - for ; l < b.nLayers; l++ { - if !b.bufInfo[l].empty() { - break - } - } - - // No free blocks, allocation failed - if l == b.nLayers { - return nil - } - - // Found a block, pop it and potentially split it. - offset := b.bufInfo[l].pop() - bitSet(b.bufInfo[l].alloc, blkIndex(l, offset)) - for ; l > fl; l-- { - // Get the buddy buffer - qa := offset + blkSize(l-1) - // Split the block at layer l, mark it as splited. - // Mark half of the block at l - 1 as allocated, - // and put it into the free list at layer l-1. - bitSet(b.bufInfo[l].split, blkIndex(l, offset)) - bitSet(b.bufInfo[l-1].alloc, blkIndex(l-1, offset)) - b.bufInfo[l-1].push(qa) - } - - buf := b.buffer[offset : offset+nbytes] - b.allocatedBytes.Add(int64(blkSize(l))) - addr := addressOf(buf) - if off, ok := b.allocatedBuf[addr]; ok { - fmt.Println("duplicated allocation", addr, offset, off) - panic("duplicated allocation") - } - b.allocatedBuf[addr] = offset - - b.sanityCheck() - return buf -} - -// free memory marked by p, which was earlier allocated using Malloc -func (b *buddyAllocator) free(bs []byte) { - b.lock.Lock() - defer b.lock.Unlock() - - if len(bs) == 0 { - bs = bs[:1] - } - - addr := addressOf(bs) - offset, ok := b.allocatedBuf[addr] - if !ok { - return - } - - l := b.layer(offset) - - b.allocatedBytes.Add(-int64(blkSize(l))) - delete(b.allocatedBuf, addr) - - // Start merge from layer l - for ; l < b.maxLayer; l++ { - // Find the buddy index at layer l - bi := blkIndex(l, offset) - buddy := bi + 1 - if bi%2 != 0 { - buddy = bi - 1 - } - - // Free p at layer l - bitClear(b.bufInfo[l].alloc, bi) - - // If buddy is allocated, break the merge - if bitIsSet(b.bufInfo[l].alloc, buddy) { - break - } - - // Buddy is free, merge with buddy and remove it from free list - buddyOffset := blkAddr(l, buddy) - b.bufInfo[l].remove(buddyOffset) - - // Update offset to the merged buffer at layer l+1 - if buddy%2 == 0 { - offset = buddyOffset - } - - // At layer l+1, mark that the merged buddy pair isn't split anymore - bitClear(b.bufInfo[l+1].split, blkIndex(l+1, offset)) - } - - // Add the final merged buffer to free list. - b.bufInfo[l].push(offset) - - b.sanityCheck() -} - -func (b *buddyAllocator) reset() { - for i := range b.bufInfo { - b.bufInfo[i].alloc = nil - b.bufInfo[i].split = nil - b.bufInfo[i].canAllocate = nil - } - b.bufInfo = nil - b.buffer = nil -} - -func (b *buddyAllocator) freeAll() { - if len(b.allocatedBuf) != 0 { - fmt.Println("allocatedBuf", len(b.allocatedBuf)) - } - - for _, offset := range b.allocatedBuf { - b.free(b.buffer[offset:]) - } - - if len(b.allocatedBuf) != 0 || b.allocatedBytes.Load() != 0 { - panic("freeAll error") - } - - b.sanityCheck() -} - -func (b *buddyAllocator) allocated() int64 { - return b.allocatedBytes.Load() -} - -/* - * Mark memory from [start, end), starting at layer 0, as allocated. - * - * start(leftbi) end rightBi - * | | | - * |--------|---------|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|--------|--------| - */ -func (b *buddyAllocator) markAllocated(start, end int) { - for k := 0; k < b.nLayers; k++ { - leftBi := blkIndex(k, start) - rightBi := blkIndexNext(k, end) - for bi := leftBi; bi < rightBi; bi++ { - // if a block is allocated at size k, mark it as split too. - bitSet(b.bufInfo[k].split, bi) - bitSet(b.bufInfo[k].alloc, bi) - } - } -} - -// Mark the range outside [start, end) as allocated -func (b *buddyAllocator) markUnavailable(start, end int) int { - heapSize := blkSize(b.maxLayer) - unavailableEnd := roundUp(heapSize-end, leafSize) - unavailableStart := roundUp(start, leafSize) - b.markAllocated(0, unavailableStart) - b.markAllocated(heapSize-unavailableEnd, heapSize) - return unavailableEnd + unavailableStart -} - -// If a block is marked as allocated and its buddy is free, put the -// buddy on the free list at layer l. -func (b *buddyAllocator) initFreePair(l, bi int) (free int) { - buddy := bi + 1 - if bi%2 == 1 { - buddy = bi - 1 - } - - // one of the pair is free - if bitIsSet(b.bufInfo[l].alloc, bi) != bitIsSet(b.bufInfo[l].alloc, buddy) { - free = blkSize(l) - if bitIsSet(b.bufInfo[l].alloc, bi) { - b.bufInfo[l].push(blkAddr(l, buddy)) - } else { - b.bufInfo[l].push(blkAddr(l, bi)) - } - } - return -} - -/* - * Initialize the free lists for each layer l. For each layer l, there - * are only two pairs that may have a buddy that should be on free list. - * - * start leftBi rightBi end - * | | | | - * |xxxxxxxx|xxxxxxxx|x-------|--------|--------|------xx|xxxxxxxx|xxxxxxxx| - */ -func (b *buddyAllocator) initFree(left, right int) int { - free := 0 - - for l := 0; l < b.maxLayer; l++ { - nblk := 1 << (b.maxLayer - l) - leftBi := blkIndexNext(l, left) - rightBi := blkIndex(l, right) - - if leftBi < nblk { - free += b.initFreePair(l, leftBi) - } - if rightBi > leftBi && (leftBi/2 != rightBi/2) && rightBi < nblk { - free += b.initFreePair(l, rightBi) - } - } - - return free -} - -// Initialize the buddy allocator, assert totalSize is the power of 2. -func (b *buddyAllocator) init(totalSize int) { - log2 := func(n int) int { - k := 0 - for n > 1 { - k++ - n = n >> 1 - } - return k - } - - // compute the number of sizes we need to manage totalSize - b.buffer = make([]byte, totalSize) - b.nLayers = log2(totalSize/leafSize) + 1 - if totalSize > blkSize(b.nLayers-1) { - b.nLayers++ // round up to the next power of 2 - } - b.maxLayer = b.nLayers - 1 - b.bufInfo = make([]bufferInfo, b.nLayers) - - // Initialize free list and allocate the alloc array for each size l. - // Also allocate the split array for each size l, l = 0 is not used. - // since we will not split blocks of size l = 0, the smallest size. - markedCount := 0 - for l := 0; l < b.nLayers; l++ { - nblk := 1 << (b.maxLayer - l) - sz := roundUp(nblk, 8) / 8 - b.bufInfo[l].canAllocate = b.buffer[markedCount : markedCount+sz] - markedCount += sz - b.bufInfo[l].alloc = b.buffer[markedCount : markedCount+sz] - markedCount += sz - b.bufInfo[l].split = b.buffer[markedCount : markedCount+sz] - markedCount += sz - b.bufInfo[l].l = l - b.bufInfo[l].nblk = nblk - } - - // Mark the memory in range [0, markedCount) and [totalSize, HeapSize) as allocated, - // where HeapSize = blkSize(maxLayer) - unavailable := b.markUnavailable(markedCount, totalSize) - // initialize free lists for each size k - free := b.initFree(0, blkSize(b.maxLayer)-unavailable) - b.unavailable = unavailable - b.total = blkSize(b.maxLayer) - - // check if the amount that is free is what we expect - if free != blkSize(b.maxLayer)-unavailable { - panic("Initialize allocator failed") - } - - b.allocatedBuf = make(map[uintptr]int, totalSize/leafSize) -} - -func (b *buddyAllocator) sanityCheck() { - if !intest.InTest { - return - } - - free := 0 - for _, binfo := range b.bufInfo { - blkSize := blkSize(binfo.l) - for bi := 0; bi < binfo.nblk; bi++ { - if bitIsSet(binfo.canAllocate, bi) { - free += blkSize - } - } - } - - alloc := 0 - for _, offset := range b.allocatedBuf { - alloc += blkSize(b.layer(offset)) - } - if alloc != int(b.allocatedBytes.Load()) { - panic("Sanity check failed") - } - - if free+int(b.allocatedBytes.Load())+b.unavailable != b.total { - panic("Sanity check failed") - } -} diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 3ba06b93f8bd2..4a4bc12d0a133 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -28,6 +28,10 @@ const ( // This value will be modifed in test var alignSize = 16 << 10 +func roundUp(n, sz int) int { + return (n + sz - 1) / sz * sz +} + func simpleGetAllocationSize(size int) int { return roundUp(size+metaSize, alignSize) * 2 } From 9206b62915ae5bee0578dddbbde3a65256f6a15a Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 5 Feb 2025 11:48:40 +0800 Subject: [PATCH 39/93] Fix test --- pkg/lightning/mydump/parquet_parser.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 318f2cba24682..70a19410e300f 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -512,9 +512,11 @@ func (pp *ParquetParser) setDecimalData(readNum, col, offset int) error { for i := 0; i < readNum; i++ { if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { - v := int64buf[i] + var v int64 if colTp == parquet.Types.Int32 { v = int64(int32buf[i]) + } else { + v = int64buf[i] } if !decimal.IsSet || decimal.Scale == 0 { pp.rows[offset+i][col].SetInt64(v) From bcbbf41d82a55ae2d927eed01ba2fcda967a6b1a Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 5 Feb 2025 16:34:35 +0800 Subject: [PATCH 40/93] Fix test --- pkg/lightning/mydump/parquet_parser.go | 371 +++++++++++++------------ 1 file changed, 197 insertions(+), 174 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 70a19410e300f..331729f275746 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -21,6 +21,7 @@ import ( "io" "math" "math/big" + "reflect" "strings" "sync/atomic" "time" @@ -36,7 +37,7 @@ import ( ) const ( - defaultBatchSize = 128 + defaultBatchSize = 16 // if a parquet if small than this threshold, parquet will load the whole file in a byte slice to // optimize the read performance @@ -80,6 +81,7 @@ type columnDumper struct { levelsBuffered int64 defLevels []int16 repLevels []int16 + values []interface{} valueBuffer any } @@ -111,46 +113,49 @@ func createcolumnDumper(tp parquet.Type) *columnDumper { batchSize: int64(batchSize), defLevels: make([]int16, batchSize), repLevels: make([]int16, batchSize), + values: make([]interface{}, batchSize), valueBuffer: valueBuffer, } } +// Type returns the column type of this dumper func (dump *columnDumper) Type() parquet.Type { return dump.reader.Type() } +// SetReader sets the reader func (dump *columnDumper) SetReader(colReader file.ColumnChunkReader) { dump.reader = colReader dump.valueOffset = 0 dump.levelOffset = 0 } -func (dump *columnDumper) readNextBatch(req int64) int { +func (dump *columnDumper) readNextBatch() int { switch reader := dump.reader.(type) { case *file.BooleanColumnChunkReader: values, _ := dump.valueBuffer.([]bool) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.Int32ColumnChunkReader: values, _ := dump.valueBuffer.([]int32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.Int64ColumnChunkReader: values, _ := dump.valueBuffer.([]int64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.Float32ColumnChunkReader: values, _ := dump.valueBuffer.([]float32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.Float64ColumnChunkReader: values, _ := dump.valueBuffer.([]float64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.Int96ColumnChunkReader: values, _ := dump.valueBuffer.([]parquet.Int96) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.ByteArrayColumnChunkReader: values, _ := dump.valueBuffer.([]parquet.ByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) case *file.FixedLenByteArrayColumnChunkReader: values, _ := dump.valueBuffer.([]parquet.FixedLenByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(req, values, dump.defLevels, dump.repLevels) + dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) } dump.valueOffset = 0 @@ -158,6 +163,37 @@ func (dump *columnDumper) readNextBatch(req int64) int { return int(dump.levelsBuffered) } +func (dump *columnDumper) hasNext() bool { + return dump.levelOffset < dump.levelsBuffered || dump.reader.HasNext() +} + +// Next reads next value from the reader +func (dump *columnDumper) Next() (interface{}, bool) { + if dump.levelOffset == dump.levelsBuffered { + if !dump.hasNext() { + return nil, false + } + dump.readNextBatch() + if dump.levelsBuffered == 0 { + return nil, false + } + } + + defLevel := dump.defLevels[int(dump.levelOffset)] + // repLevel := dump.repLevels[int(dump.levelOffset)] + dump.levelOffset++ + + if defLevel < dump.reader.Descriptor().MaxDefinitionLevel() { + return nil, true + } + + vb := reflect.ValueOf(dump.valueBuffer) + v := vb.Index(dump.valueOffset).Interface() + dump.valueOffset++ + + return v, true +} + // convertedType is older representation of the logical type in parquet // ref: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md type convertedType struct { @@ -377,8 +413,7 @@ func (pp *ParquetParser) GetMemoryUsage() (memoryUsageStream, memoryUsageNonStre // so we only need to read one column chunk. dumper := pp.dumpers[0] for { - read := dumper.readNextBatch(defaultBatchSize) - if read == 0 { + if _, ok := dumper.Next(); !ok { break } } @@ -426,160 +461,125 @@ func (pp *ParquetParser) GetMemoryUsage() (memoryUsageStream, memoryUsageNonStre roundUp(pageUsage+readBufferUsageNonStream, defaultArenaSize) } -func (pp *ParquetParser) setStringData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetBytesAsString(buf[i], "utf8mb4_bin", uint32(len(buf[i]))) - } +func (pp *ParquetParser) setStringData(row, col int, val interface{}) { + vba, _ := val.(parquet.ByteArray) + pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") } -func (pp *ParquetParser) setInt32Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int32) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetInt64(int64(buf[i])) - } +func (pp *ParquetParser) setInt32Data(row, col int, val interface{}) { + v32, _ := val.(int32) + pp.rows[row][col].SetInt64(int64(v32)) } -func (pp *ParquetParser) setUint32Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int64) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetUint64(uint64(buf[i])) - } +func (pp *ParquetParser) setUint32Data(row, col int, val interface{}) { + v64, _ := val.(int64) + pp.rows[row][col].SetUint64(uint64(v64)) } -func (pp *ParquetParser) setInt64Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int64) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetInt64(buf[i]) - } +func (pp *ParquetParser) setInt64Data(row, col int, val interface{}) { + v64, _ := val.(int64) + pp.rows[row][col].SetInt64(v64) } -func (pp *ParquetParser) setUint64Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int64) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetUint64(uint64(buf[i])) - } +func (pp *ParquetParser) setUint64Data(row, col int, val interface{}) { + v64, _ := val.(int64) + pp.rows[row][col].SetUint64(uint64(v64)) } -func (pp *ParquetParser) setTimeMillisData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int32) - for i := 0; i < readNum; i++ { - timeStr := formatTime(int64(buf[i]), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) - pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") - } +func (pp *ParquetParser) setTimeMillisData(row, col int, val interface{}) { + v32, _ := val.(int32) + timeStr := formatTime(int64(v32), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) + pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimeMicrosData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int32) - for i := 0; i < readNum; i++ { - timeStr := formatTime(int64(buf[i]), "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) - pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") - } +func (pp *ParquetParser) setTimeMicrosData(row, col int, val interface{}) { + v64, _ := val.(int64) + timeStr := formatTime(v64, "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) + pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimestampMillisData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int64) - for i := 0; i < readNum; i++ { - timeStr := formatTime(buf[i], "MILLIS", timeLayout, utcTimeLayout, true) - pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") - } +func (pp *ParquetParser) setTimestampMillisData(row, col int, val interface{}) { + v64, _ := val.(int64) + timeStr := formatTime(v64, "MILLIS", timeLayout, utcTimeLayout, true) + pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimestampMicrosData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int64) - for i := 0; i < readNum; i++ { - timeStr := formatTime(buf[i], "MICROS", timeLayout, utcTimeLayout, true) - pp.rows[offset+i][col].SetString(timeStr, "utf8mb4_bin") - } +func (pp *ParquetParser) setTimestampMicrosData(row, col int, val interface{}) { + v64, _ := val.(int64) + timeStr := formatTime(v64, "MICROS", timeLayout, utcTimeLayout, true) + pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setDateData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]int32) - for i := 0; i < readNum; i++ { - dateStr := time.Unix(int64(buf[i])*86400, 0).Format(time.DateOnly) - pp.rows[offset+i][col].SetString(dateStr, "utf8mb4_bin") - } +func (pp *ParquetParser) setDateData(row, col int, val interface{}) { + v32, _ := val.(int32) + dateStr := time.Unix(int64(v32)*86400, 0).Format(time.DateOnly) + pp.rows[row][col].SetString(dateStr, "utf8mb4_bin") } -func (pp *ParquetParser) setDecimalData(readNum, col, offset int) error { +func (pp *ParquetParser) setDecimalData(row, col int, val interface{}) { colTp := pp.dumpers[col].Type() decimal := pp.colMetas[col].decimalMeta - int32buf, _ := pp.dumpers[col].valueBuffer.([]int32) - int64buf, _ := pp.dumpers[col].valueBuffer.([]int64) - fixBuf, _ := pp.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) - byteBuf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) - - for i := 0; i < readNum; i++ { - if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { - var v int64 - if colTp == parquet.Types.Int32 { - v = int64(int32buf[i]) - } else { - v = int64buf[i] - } - if !decimal.IsSet || decimal.Scale == 0 { - pp.rows[offset+i][col].SetInt64(v) - continue - } - minLen := decimal.Scale + 1 - if v < 0 { - minLen++ - } - val := fmt.Sprintf("%0*d", minLen, v) - dotIndex := len(val) - int(decimal.Scale) - pp.rows[offset+i][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") - } else if colTp == parquet.Types.FixedLenByteArray { - s := binaryToDecimalStr(fixBuf[i], int(decimal.Scale)) - pp.rows[offset+i][col].SetString(s, "utf8mb4_bin") + if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { + var v int64 + if colTp == parquet.Types.Int32 { + v32, _ := val.(int32) + v = int64(v32) } else { - s := binaryToDecimalStr(byteBuf[i], int(decimal.Scale)) - pp.rows[offset+i][col].SetString(s, "utf8mb4_bin") + v, _ = val.(int64) + } + if !decimal.IsSet || decimal.Scale == 0 { + pp.rows[row][col].SetInt64(v) + return + } + minLen := decimal.Scale + 1 + if v < 0 { + minLen++ } + val := fmt.Sprintf("%0*d", minLen, v) + dotIndex := len(val) - int(decimal.Scale) + pp.rows[row][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") + } else if colTp == parquet.Types.FixedLenByteArray { + v, _ := val.(parquet.FixedLenByteArray) + s := binaryToDecimalStr(v, int(decimal.Scale)) + pp.rows[row][col].SetString(s, "utf8mb4_bin") + } else { + v, _ := val.(parquet.ByteArray) + s := binaryToDecimalStr(v, int(decimal.Scale)) + pp.rows[row][col].SetString(s, "utf8mb4_bin") } - return nil } -func (pp *ParquetParser) setBoolData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]bool) - for i := 0; i < readNum; i++ { - if buf[i] { - pp.rows[offset+i][col].SetUint64(1) - } else { - pp.rows[offset+i][col].SetUint64(0) - } +func (pp *ParquetParser) setBoolData(row, col int, val interface{}) { + boolVal, _ := val.(bool) + if boolVal { + pp.rows[row][col].SetUint64(1) + return } + pp.rows[row][col].SetUint64(0) } -func (pp *ParquetParser) setFloat32Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]float32) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetFloat32(buf[i]) - } +func (pp *ParquetParser) setFloat32Data(row, col int, val interface{}) { + vf32, _ := val.(float32) + pp.rows[row][col].SetFloat32(vf32) } -func (pp *ParquetParser) setFloat64Data(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]float64) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetFloat64(buf[i]) - } +func (pp *ParquetParser) setFloat64Data(row, col int, val interface{}) { + vf64, _ := val.(float64) + pp.rows[row][col].SetFloat64(vf64) } -func (pp *ParquetParser) setFixedByteArrayData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]parquet.FixedLenByteArray) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") - } +func (pp *ParquetParser) setFixedByteArrayData(row, col int, val interface{}) { + vfa, _ := val.(parquet.FixedLenByteArray) + pp.rows[row][col].SetString(string(vfa), "utf8mb4_bin") } -func (pp *ParquetParser) setByteArrayData(readNum, col, offset int) { - buf, _ := pp.dumpers[col].valueBuffer.([]parquet.ByteArray) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetString(string(buf[i]), "utf8mb4_bin") - } +func (pp *ParquetParser) setByteArrayData(row, col int, val interface{}) { + vba, _ := val.(parquet.ByteArray) + pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") } -func (pp *ParquetParser) setInt96Data(readNum, col, offset int) { +func (pp *ParquetParser) setInt96Data(row, col int, val interface{}) { // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 // INT96 timestamp layout // -------------------------- @@ -590,10 +590,8 @@ func (pp *ParquetParser) setInt96Data(readNum, col, offset int) { // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, // where dt is a negative number but still legal in the context of Go. // But it will cause errors or potential data inconsistency when importing. - buf, _ := pp.dumpers[col].valueBuffer.([]parquet.Int96) - for i := 0; i < readNum; i++ { - pp.rows[offset+i][col].SetString(buf[i].ToTime().Format(utcTimeLayout), "utf8mb4_bin") - } + v96, _ := val.(parquet.Int96) + pp.rows[row][col].SetString(v96.ToTime().Format(utcTimeLayout), "utf8mb4_bin") } // Init initializes the Parquet parser and allocate necessary buffers @@ -629,6 +627,10 @@ func (pp *ParquetParser) resetReader() { // ReadRows read several rows internally and store them in the row buffer. func (pp *ParquetParser) ReadRows(num int) (int, error) { + if num > defaultBatchSize { + return 0, errors.Errorf("Number of rows read larger than buffer size") + } + readNum := min(num, pp.totalRows-pp.curRows) if readNum == 0 { return 0, nil @@ -678,57 +680,68 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { ) // Read data into buffers first - for i, dumper := range pp.dumpers { - total = dumper.readNextBatch(int64(num)) - meta := pp.colMetas[i] + for col, dumper := range pp.dumpers { + meta := pp.colMetas[col] physicalTp := dumper.Type() - // If we can't get converted type, just use physical type + var setFunc func(row, col int, val interface{}) if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { switch physicalTp { case parquet.Types.Boolean: - pp.setBoolData(num, i, storeOffset) + setFunc = pp.setBoolData case parquet.Types.Int32: - pp.setInt32Data(num, i, storeOffset) + setFunc = pp.setInt32Data case parquet.Types.Int64: - pp.setInt64Data(num, i, storeOffset) + setFunc = pp.setInt64Data case parquet.Types.Int96: - pp.setInt96Data(num, i, storeOffset) + setFunc = pp.setInt96Data case parquet.Types.Float: - pp.setFloat32Data(num, i, storeOffset) + setFunc = pp.setFloat32Data case parquet.Types.Double: - pp.setFloat64Data(num, i, storeOffset) + setFunc = pp.setFloat64Data case parquet.Types.ByteArray: - pp.setByteArrayData(num, i, storeOffset) + setFunc = pp.setByteArrayData case parquet.Types.FixedLenByteArray: - pp.setFixedByteArrayData(num, i, storeOffset) + setFunc = pp.setFixedByteArrayData + } + } else { + switch meta.converted { + case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: + setFunc = pp.setStringData + case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: + setFunc = pp.setInt32Data + case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: + setFunc = pp.setUint32Data + case schema.ConvertedTypes.Int64: + setFunc = pp.setInt64Data + case schema.ConvertedTypes.Uint64: + setFunc = pp.setUint64Data + case schema.ConvertedTypes.TimeMillis: + setFunc = pp.setTimeMillisData + case schema.ConvertedTypes.TimeMicros: + setFunc = pp.setTimeMicrosData + case schema.ConvertedTypes.TimestampMillis: + setFunc = pp.setTimestampMillisData + case schema.ConvertedTypes.TimestampMicros: + setFunc = pp.setTimestampMicrosData + case schema.ConvertedTypes.Date: + setFunc = pp.setDateData + case schema.ConvertedTypes.Decimal: + setFunc = pp.setDecimalData } - continue } - switch meta.converted { - case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: - pp.setStringData(num, i, storeOffset) - case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: - pp.setInt32Data(num, i, storeOffset) - case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: - pp.setUint32Data(num, i, storeOffset) - case schema.ConvertedTypes.Int64: - pp.setInt64Data(num, i, storeOffset) - case schema.ConvertedTypes.Uint64: - pp.setUint64Data(num, i, storeOffset) - case schema.ConvertedTypes.TimeMillis: - pp.setTimeMillisData(num, i, storeOffset) - case schema.ConvertedTypes.TimeMicros: - pp.setTimeMicrosData(num, i, storeOffset) - case schema.ConvertedTypes.TimestampMillis: - pp.setTimestampMillisData(num, i, storeOffset) - case schema.ConvertedTypes.TimestampMicros: - pp.setTimestampMicrosData(num, i, storeOffset) - case schema.ConvertedTypes.Date: - pp.setDateData(num, i, storeOffset) - case schema.ConvertedTypes.Decimal: - err = pp.setDecimalData(num, i, storeOffset) + for i := 0; i < num; i++ { + val, ok := dumper.Next() + if !ok { + break + } + + if val == nil { + pp.rows[storeOffset+i][col].SetNull() + continue + } + setFunc(storeOffset+i, col, val) } } @@ -744,16 +757,26 @@ func (pp *ParquetParser) Pos() (pos int64, rowID int64) { // For parquet file, this interface will read and discard the first `pos` rows, // and set the current row ID to `rowID` func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { - pp.lastRow.RowID = rowID - if pos < int64(pp.curRows) { - panic("don't support seek back yet") + curPos, _ := pp.Pos() + if pos < curPos { + return errors.Errorf("Parquet parset doesn't support seek back yet") } // Read and discard these rows - read := int(pos) - pp.curRows - _, err := pp.ReadRows(read) - pp.curIdx, pp.avail = 0, 0 - return errors.Trace(err) + pos = min(pos, int64(pp.totalRows)) + for !(int(pos) >= pp.curRows-pp.avail && int(pos) < pp.curRows) { + numRead, err := pp.ReadRows(defaultBatchSize) + if err != nil { + return errors.Trace(err) + } + if numRead == 0 { + break + } + } + + pp.curIdx = int(pos) - (pp.curRows - pp.avail) + pp.lastRow.RowID = rowID + return nil } // ScannedPos implements the Parser interface. From 3a374cef49502a9fc17f737971d64e824684d7b6 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 5 Feb 2025 16:34:59 +0800 Subject: [PATCH 41/93] Fix test --- pkg/lightning/mydump/parquet_parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 331729f275746..60e2367936da4 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -37,7 +37,7 @@ import ( ) const ( - defaultBatchSize = 16 + defaultBatchSize = 128 // if a parquet if small than this threshold, parquet will load the whole file in a byte slice to // optimize the read performance From 7eb8b3e4807f8d423f07fb0d0d21e8aebc01816d Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 6 Feb 2025 14:06:56 +0800 Subject: [PATCH 42/93] Update bazel --- DEPS.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 983b7c76c8c64..4669da2a2bd7d 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -2897,10 +2897,10 @@ def go_deps(): sha256 = "6636c4a48a010844df02886621c32706af2f5e707ad23acb52fe22510b60c822", strip_prefix = "github.com/golang/glog@v1.2.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "http://ats.apps.svc/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.4.zip", + "http://ats.apps.svc/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.4.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang/glog/com_github_golang_glog-v1.2.4.zip", ], ) go_repository( From abe850f39d788cea259b20326793f658aef528c8 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sat, 8 Feb 2025 16:21:41 +0800 Subject: [PATCH 43/93] Update memory allocation --- lightning/pkg/importer/chunk_process.go | 12 ++--- lightning/pkg/importer/import.go | 2 +- lightning/pkg/importer/table_import.go | 59 +------------------------ pkg/executor/importer/import.go | 19 +++++++- pkg/lightning/mydump/allocator.go | 33 +++++++++++--- pkg/lightning/mydump/parquet_parser.go | 38 ++++++++++++++-- 6 files changed, 85 insertions(+), 78 deletions(-) diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index ec8d9011269cb..14fc0069aef43 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -22,7 +22,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/keyspace" "github.com/pingcap/tidb/pkg/lightning/backend" @@ -51,11 +50,9 @@ import ( // for local backend it encodes and writes KV to local disk // for tidb backend it transforms data into sql and executes them. type chunkProcessor struct { - parser mydump.Parser - index int - chunk *checkpoints.ChunkCheckpoint - memLimiter *membuf.Limiter - memoryUsage int + parser mydump.Parser + index int + chunk *checkpoints.ChunkCheckpoint } func newChunkProcessor( @@ -779,8 +776,5 @@ func (*chunkProcessor) maybeSaveCheckpoint( } func (cr *chunkProcessor) close() { - if cr.memLimiter != nil { - cr.memLimiter.Release(cr.memoryUsage) - } _ = cr.parser.Close() } diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index 982d3c47ef658..5b5f033742f90 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -546,7 +546,7 @@ func (rc *Controller) Close() { func (rc *Controller) Run(ctx context.Context) error { failpoint.Inject("beforeRun", func() {}) - setMemoryLimitForParquet(rc.cfg.App.MaxMemoryUsage) + mydump.SetMemoryLimitForParquet(rc.cfg.App.MaxMemoryUsage, true) opts := []func(context.Context) error{ rc.setGlobalVariables, diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 32ab3d8d50eda..e2289244d08d8 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -20,7 +20,6 @@ import ( "database/sql" "encoding/hex" "fmt" - "math" "path/filepath" "slices" "strings" @@ -30,7 +29,6 @@ import ( dmysql "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/failpoint" - "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/lightning/pkg/web" "github.com/pingcap/tidb/pkg/errno" @@ -54,7 +52,6 @@ import ( "github.com/pingcap/tidb/pkg/table/tables" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/extsort" - "github.com/pingcap/tidb/pkg/util/memory" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/multierr" "go.uber.org/zap" @@ -62,20 +59,6 @@ import ( "google.golang.org/grpc/status" ) -var memLimit int // memory limit for parquet reader -var memLimiter *membuf.Limiter // memory limiter for parquet reader - -func setMemoryLimitForParquet(percent int) { - memTotal, err := memory.MemTotal() - if err != nil { - // Set limit to int max, which means no limiter - memTotal = math.MaxInt32 - } - memLimit = int(memTotal) * min(percent, 90) / 100 - memLimiter = membuf.NewLimiter(memLimit) - mydump.InitializeGlobalArena(memLimit) -} - // TableImporter is a helper struct to import a table. type TableImporter struct { // The unique table name in the form "`db`.`tbl`". @@ -794,53 +777,13 @@ ChunkLoop: break } - var memoryUsage int - // Limit the concurrency of parquet reader using estimated memory usage. - if chunk.FileMeta.Type == mydump.SourceTypeParquet { - arenaSize := mydump.GetArenaSize() - - memQuota := memLimit / rc.cfg.App.RegionConcurrency / arenaSize * arenaSize - if memQuota > chunk.FileMeta.ParquetMeta.MemoryUsageFull { - memoryUsage = chunk.FileMeta.ParquetMeta.MemoryUsageFull - chunk.FileMeta.ParquetMeta.UseStreaming = false - } else { - memoryUsage = chunk.FileMeta.ParquetMeta.MemoryUsage - chunk.FileMeta.ParquetMeta.UseStreaming = true - } - chunk.FileMeta.ParquetMeta.UseSampleAllocator = false - - // If memory usage is larger than memory limit, set memory usage - // to limit to block other file import. - if memoryUsage > memLimit { - tr.logger.Warn("Memory usage larger than limit", - zap.String("file", chunk.FileMeta.Path), - zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), - zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), - zap.Bool("streaming mode", chunk.FileMeta.ParquetMeta.UseStreaming), - ) - memoryUsage = memLimit - } else { - tr.logger.Info("Get memory limit", - zap.String("file", chunk.FileMeta.Path), - zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), - zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), - zap.Bool("streaming mode", chunk.FileMeta.ParquetMeta.UseStreaming), - ) - } - } - + chunk.FileMeta.ParquetMeta.UseSampleAllocator = false cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { setError(err) break } - if chunk.FileMeta.Type == mydump.SourceTypeParquet { - memLimiter.Acquire(memoryUsage) - cr.memLimiter = memLimiter - cr.memoryUsage = memoryUsage - } - restoreWorker := rc.regionWorkers.Apply() wg.Add(1) go func(w *worker.Worker, cr *chunkProcessor) { diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index cdf413ba71401..b534e99f40383 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1131,6 +1131,22 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } } + // Fill memory usage info + if sourceType == mydump.SourceTypeParquet { + _, memoryUsage, _, err := mydump.SampleParquetFileProperty(ctx, *dataFiles[0], e.dataStore) + if err != nil { + return errors.Trace(err) + } + for _, dataFile := range dataFiles { + // To reduce the memory usage, we only use streaming mode to read file. + dataFile.ParquetMeta = mydump.ParquetFileMeta{ + MemoryUsage: memoryUsage, + UseStreaming: true, + UseSampleAllocator: false, + } + } + } + e.dataFiles = dataFiles e.TotalFileSize = totalSize return nil @@ -1225,11 +1241,12 @@ func (e *LoadDataController) GetParser( nil, ) case DataFormatParquet: - parser, err = mydump.NewParquetParser( + parser, err = mydump.NewParquetParserWithMeta( ctx, e.dataStore, reader, dataFileInfo.Remote.Path, + dataFileInfo.Remote.ParquetMeta, ) } if err != nil { diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 072aed70b8db9..b782fe372813b 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -15,6 +15,7 @@ package mydump import ( + "math" "os" "runtime" "runtime/debug" @@ -23,6 +24,7 @@ import ( "unsafe" "github.com/joechenrh/arrow-go/v18/arrow/memory" + "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/pkg/lightning/log" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" @@ -40,6 +42,9 @@ var ( maxArenaCount = 0 // maximum arena count defaultArenaSize = 256 << 20 // size of each arena + memLimit int // memory limit for parquet reader + memLimiter *membuf.Limiter // memory limiter for parquet reader + // AllocSize returns actual allocated size in arena AllocSize func(int) int @@ -47,9 +52,32 @@ var ( GetArena func(int) arena ) +// SetMemoryLimitForParquet set the memory limit for parquet reader and create a global memory pool if necessary. +func SetMemoryLimitForParquet(percent int, useGlobal bool) { + memTotal, err := tidbmemory.MemTotal() + if err != nil { + // Set limit to int max, which means no limiter + memTotal = math.MaxInt32 + } + memLimit = int(memTotal) * min(percent, 90) / 100 + memLimiter = membuf.NewLimiter(memLimit) + if useGlobal { + InitializeGlobalArena(memLimit) + } + + log.L().Info("set memory limit", + zap.Int("total memory", int(memTotal)), + zap.Int("memory limit", int(memLimit)), + ) +} + func init() { AllocSize = simpleGetAllocationSize GetArena = getSimpleAllocator + + // This is used for `IMPORT INTO``. + // We set the default memory usage to 40% and don't use a global arena pool. + SetMemoryLimitForParquet(40, false) } // Get the address of a buffer, return 0 if the buffer is nil @@ -61,11 +89,6 @@ func addressOf(buf []byte) uintptr { return uintptr(unsafe.Pointer(&buf[0])) } -// GetArenaSize return the default arena size -func GetArenaSize() int { - return defaultArenaSize -} - // arena is the interface of single allocator type arena interface { allocate(int) []byte diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 60e2367936da4..b78e334d2f26d 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -31,9 +31,11 @@ import ( "github.com/joechenrh/arrow-go/v18/parquet/file" "github.com/joechenrh/arrow-go/v18/parquet/schema" "github.com/pingcap/errors" + "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/types" + "go.uber.org/zap" ) const ( @@ -391,13 +393,16 @@ type ParquetParser struct { curRowGroup int totalRowGroup int - curRowInGroup int - totalRowsInGroup int - curRows int - totalRows int + curRowInGroup int // number of rows read in current group + totalRowsInGroup int // total rows in current group + curRows int // number of rows read in total + totalRows int // total rows in this file lastRow Row logger log.Logger + + memoryUsage int + memLimiter *membuf.Limiter } // GetMemoryUsage estimate the memory usage for this file. @@ -799,6 +804,7 @@ func (pp *ParquetParser) Close() error { if a, ok := pp.alloc.(interface{ Close() }); ok { a.Close() } + pp.memLimiter.Release(pp.memoryUsage) return nil } @@ -1034,6 +1040,27 @@ func NewParquetParserWithMeta( path string, meta ParquetFileMeta, ) (*ParquetParser, error) { + // Acquire memory limiter first + var memoryUsage int + if meta.UseSampleAllocator { + memoryUsage = 0 + meta.UseStreaming = true + } else if meta.MemoryUsageFull < defaultArenaSize { + memoryUsage = meta.MemoryUsageFull + meta.UseStreaming = false + } else { + memoryUsage = meta.MemoryUsage + meta.UseStreaming = true + } + memoryUsage = min(memoryUsage, memLimit) + memLimiter.Acquire(memoryUsage) + log.FromContext(ctx).Info("Get memory usage of parquet reader", + zap.String("file", path), + zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), + zap.Bool("streaming mode", meta.UseStreaming), + zap.Bool("use sample allocator", meta.UseSampleAllocator), + ) + wrapper, ok := r.(*parquetFileWrapper) if !ok { wrapper = &parquetFileWrapper{ @@ -1083,6 +1110,7 @@ func NewParquetParserWithMeta( subreaders = append(subreaders, reader) for i := 1; i < fileSchema.NumColumns(); i++ { var newWrapper parquet.ReaderAtSeeker + // If use streaming mode, we will open file for each column. if meta.UseStreaming { newWrapper, err = wrapper.Open("") if err != nil { @@ -1104,6 +1132,8 @@ func NewParquetParserWithMeta( columnNames: columnNames, alloc: allocator, logger: log.FromContext(ctx), + memoryUsage: memoryUsage, + memLimiter: memLimiter, } if err := parser.Init(); err != nil { return nil, errors.Trace(err) From 15973efd833be23bc0c1599dbf39cccb9d736e3e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 10 Feb 2025 15:12:15 +0800 Subject: [PATCH 44/93] Update memory control --- pkg/disttask/importinto/proto.go | 1 + pkg/disttask/importinto/wrapper.go | 2 ++ pkg/executor/import_into.go | 22 ++++++++++++++++++---- pkg/executor/importer/import.go | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pkg/disttask/importinto/proto.go b/pkg/disttask/importinto/proto.go index e8c9d3f9794e4..3ce427abbf6d1 100644 --- a/pkg/disttask/importinto/proto.go +++ b/pkg/disttask/importinto/proto.go @@ -169,6 +169,7 @@ type Chunk struct { Type mydump.SourceType Compression mydump.Compression Timestamp int64 + ParquetMeta mydump.ParquetFileMeta } // Checksum records the checksum information. diff --git a/pkg/disttask/importinto/wrapper.go b/pkg/disttask/importinto/wrapper.go index 9d7b80ca74fb9..f2c8d892afebc 100644 --- a/pkg/disttask/importinto/wrapper.go +++ b/pkg/disttask/importinto/wrapper.go @@ -30,6 +30,7 @@ func toChunkCheckpoint(chunk Chunk) checkpoints.ChunkCheckpoint { Type: chunk.Type, Compression: chunk.Compression, FileSize: chunk.FileSize, + ParquetMeta: chunk.ParquetMeta, }, Chunk: mydump.Chunk{ PrevRowIDMax: chunk.PrevRowIDMax, @@ -52,5 +53,6 @@ func toChunk(chunkCheckpoint checkpoints.ChunkCheckpoint) Chunk { Type: chunkCheckpoint.FileMeta.Type, Compression: chunkCheckpoint.FileMeta.Compression, Timestamp: chunkCheckpoint.Timestamp, + ParquetMeta: chunkCheckpoint.FileMeta.ParquetMeta, } } diff --git a/pkg/executor/import_into.go b/pkg/executor/import_into.go index 1c0fa35f47b31..8c876a7629bb9 100644 --- a/pkg/executor/import_into.go +++ b/pkg/executor/import_into.go @@ -17,6 +17,7 @@ package executor import ( "context" "fmt" + "runtime/debug" "github.com/google/uuid" "github.com/pingcap/errors" @@ -53,10 +54,11 @@ const unknownImportedRowCount = -1 // ImportIntoExec represents a IMPORT INTO executor. type ImportIntoExec struct { exec.BaseExecutor - selectExec exec.Executor - userSctx sessionctx.Context - controller *importer.LoadDataController - stmt string + selectExec exec.Executor + userSctx sessionctx.Context + controller *importer.LoadDataController + stmt string + prevGCPercentage int plan *plannercore.ImportInto tbl table.Table @@ -111,6 +113,11 @@ func (e *ImportIntoExec) Next(ctx context.Context, req *chunk.Chunk) (err error) return err2 } + // Set GCPercentage to 50 for parquet format to prevent OOM. + if e.controller.Format == importer.DataFormatParquet { + e.prevGCPercentage = debug.SetGCPercent(50) + } + // must use a new session to pre-check, else the stmt in show processlist will be changed. newSCtx, err2 := CreateSession(e.userSctx) if err2 != nil { @@ -330,6 +337,13 @@ func (e *ImportIntoExec) importFromSelect(ctx context.Context) error { return nil } +func (e *ImportIntoExec) Close() error { + if e.prevGCPercentage > 0 { + debug.SetGCPercent(e.prevGCPercentage) + } + return e.BaseExecutor.Close() +} + // ImportIntoActionExec represents a import into action executor. type ImportIntoActionExec struct { exec.BaseExecutor diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index b534e99f40383..7cb32f1ce359b 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -60,6 +60,7 @@ import ( "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" "github.com/pingcap/tidb/pkg/util/filter" "github.com/pingcap/tidb/pkg/util/logutil" + "github.com/pingcap/tidb/pkg/util/memory" "github.com/pingcap/tidb/pkg/util/stringutil" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -539,6 +540,9 @@ func (p *Plan) initDefaultOptions(targetNodeCPUCnt int) { if p.DataSourceType == DataSourceTypeQuery { threadCnt = 2 } + if p.Format == DataFormatParquet { + threadCnt = int(math.Max(1, float64(targetNodeCPUCnt)*0.25)) + } p.Checksum = config.OpLevelRequired p.ThreadCnt = threadCnt @@ -1145,6 +1149,16 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { UseSampleAllocator: false, } } + + // Adjust thread count for parquet + memTotal, err := memory.MemTotal() + if err == nil { + limit := min(int(memTotal)*50/100/int(e.dataFiles[0].ParquetMeta.MemoryUsage), e.Plan.ThreadCnt) + limit = max(limit, 1) + log.L().Info("adjust IMPORT INTO thread count for parquet", + zap.Int("thread count", e.Plan.ThreadCnt), zap.Int("after", limit)) + e.Plan.ThreadCnt = limit + } } e.dataFiles = dataFiles From fe8cd9c3c92defe1b30acbcb71b4f0443ddaa777 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 12 Feb 2025 13:24:50 +0800 Subject: [PATCH 45/93] Fix IMPORT INTO parquet --- pkg/executor/importer/import.go | 5 +---- pkg/lightning/mydump/parquet_parser.go | 6 +++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 7cb32f1ce359b..70bf33756f243 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -540,9 +540,6 @@ func (p *Plan) initDefaultOptions(targetNodeCPUCnt int) { if p.DataSourceType == DataSourceTypeQuery { threadCnt = 2 } - if p.Format == DataFormatParquet { - threadCnt = int(math.Max(1, float64(targetNodeCPUCnt)*0.25)) - } p.Checksum = config.OpLevelRequired p.ThreadCnt = threadCnt @@ -1153,7 +1150,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Adjust thread count for parquet memTotal, err := memory.MemTotal() if err == nil { - limit := min(int(memTotal)*50/100/int(e.dataFiles[0].ParquetMeta.MemoryUsage), e.Plan.ThreadCnt) + limit := min(int(memTotal)*50/100/int(dataFiles[0].ParquetMeta.MemoryUsage), e.Plan.ThreadCnt) limit = max(limit, 1) log.L().Info("adjust IMPORT INTO thread count for parquet", zap.Int("thread count", e.Plan.ThreadCnt), zap.Int("after", limit)) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index b78e334d2f26d..8f0b019eca805 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -130,6 +130,8 @@ func (dump *columnDumper) SetReader(colReader file.ColumnChunkReader) { dump.reader = colReader dump.valueOffset = 0 dump.levelOffset = 0 + dump.levelsBuffered = 0 + dump.valuesBuffered = 0 } func (dump *columnDumper) readNextBatch() int { @@ -804,7 +806,9 @@ func (pp *ParquetParser) Close() error { if a, ok := pp.alloc.(interface{ Close() }); ok { a.Close() } - pp.memLimiter.Release(pp.memoryUsage) + if pp.memLimiter != nil { + pp.memLimiter.Release(pp.memoryUsage) + } return nil } From 6e42ba4b874d22e2e8187e0e8231d81846cdc367 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 13 Feb 2025 19:15:23 +0800 Subject: [PATCH 46/93] Update memory control --- pkg/executor/importer/import.go | 5 +++- pkg/lightning/mydump/allocator.go | 13 +++++++++ pkg/lightning/mydump/loader.go | 1 + pkg/lightning/mydump/parquet_parser.go | 40 ++++++++++++++++++-------- 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 70bf33756f243..d3afac437fea3 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1134,14 +1134,17 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet { - _, memoryUsage, _, err := mydump.SampleParquetFileProperty(ctx, *dataFiles[0], e.dataStore) + _, memoryUsage, memoryUsageFull, err := mydump.SampleParquetFileProperty(ctx, *dataFiles[0], e.dataStore) if err != nil { return errors.Trace(err) } for _, dataFile := range dataFiles { // To reduce the memory usage, we only use streaming mode to read file. + // TODO(joechenrh): set a more proper memory quota dataFile.ParquetMeta = mydump.ParquetFileMeta{ MemoryUsage: memoryUsage, + MemoryUsageFull: memoryUsageFull, + MemoryQuota: mydump.GetMemoryQuota(runtime.NumCPU()), UseStreaming: true, UseSampleAllocator: false, } diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index b782fe372813b..eb673d0762cc3 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -71,6 +71,19 @@ func SetMemoryLimitForParquet(percent int, useGlobal bool) { ) } +// GetMemoryQuota get the memory quota for non-streaming mode read. +// TODO(joechenrh): set a more proper memory quota +func GetMemoryQuota(concurrency int) int { + quotaPerTask := memLimit / concurrency + + // Because other part like encoder also need memory, + // we assume that the reader can use up to 80% of the memroy. + // Maybe we can have a more accurate estimation later. + quotaPerReader := quotaPerTask * 8 / 10 + quotaPerReader = quotaPerReader / defaultArenaSize * defaultArenaSize + return quotaPerReader +} + func init() { AllocSize = simpleGetAllocationSize GetArena = getSimpleAllocator diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 26ccb9e898727..84f4a2e1f1b85 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -91,6 +91,7 @@ type ParquetFileMeta struct { Rows int64 // row count MemoryUsage int // memory usage for streaming mode MemoryUsageFull int // memory usage for non-streaming mode + MemoryQuota int // memory quota for current file reader to use non-streaming mode UseStreaming bool // whether use streaming mode UseSampleAllocator bool // whether use sample allocator } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 8f0b019eca805..cc4269dcb6db8 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -454,12 +454,23 @@ func (pp *ParquetParser) GetMemoryUsage() (memoryUsageStream, memoryUsageNonStre } readBufferUsageNonStream += AllocSize(defaultBufSize) * len(pp.columnNames) - for i := numColumns; i < 5*numColumns; i += 4 { - dictUsage = max(dictUsage, AllocSize(bufSizes[i])) - dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i+2])) + hasDict := true + if 5*numColumns > len(bufSizes) { + hasDict = false } - for i := 5 * numColumns; i < len(bufSizes); i += 2 { - dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) + + if hasDict { + for i := numColumns; i < 5*numColumns; i += 4 { + dictUsage = max(dictUsage, AllocSize(bufSizes[i])) + dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i+2])) + } + for i := 5 * numColumns; i < len(bufSizes); i += 2 { + dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) + } + } else { + for i := numColumns; i < len(bufSizes); i += 2 { + dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) + } } pageUsage := (dataPageUsage + dictUsage) * numColumns @@ -796,6 +807,16 @@ func (pp *ParquetParser) ScannedPos() (int64, error) { // Close closes the parquet file of the parser. // It implements the Parser interface. func (pp *ParquetParser) Close() error { + defer func() { + if a, ok := pp.alloc.(interface{ Close() }); ok { + a.Close() + } + + if pp.memLimiter != nil { + pp.memLimiter.Release(pp.memoryUsage) + } + }() + pp.resetReader() for _, r := range pp.readers { if err := r.Close(); err != nil { @@ -803,12 +824,6 @@ func (pp *ParquetParser) Close() error { } } - if a, ok := pp.alloc.(interface{ Close() }); ok { - a.Close() - } - if pp.memLimiter != nil { - pp.memLimiter.Release(pp.memoryUsage) - } return nil } @@ -1049,7 +1064,7 @@ func NewParquetParserWithMeta( if meta.UseSampleAllocator { memoryUsage = 0 meta.UseStreaming = true - } else if meta.MemoryUsageFull < defaultArenaSize { + } else if meta.MemoryUsageFull < meta.MemoryQuota { memoryUsage = meta.MemoryUsageFull meta.UseStreaming = false } else { @@ -1061,6 +1076,7 @@ func NewParquetParserWithMeta( log.FromContext(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), + zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryUsage>>20)), zap.Bool("streaming mode", meta.UseStreaming), zap.Bool("use sample allocator", meta.UseSampleAllocator), ) From b62cc264e1412023185a9d6d20b6d11964d2df38 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 14 Feb 2025 10:21:19 +0800 Subject: [PATCH 47/93] Update code for IMPORT INTO --- go.mod | 2 +- go.sum | 2 + lightning/pkg/importer/chunk_process.go | 2 +- lightning/pkg/importer/get_pre_info.go | 10 +- lightning/pkg/importer/table_import.go | 1 + pkg/executor/import_into.go | 18 +- pkg/executor/importer/import.go | 17 +- pkg/lightning/mydump/allocator.go | 21 +- pkg/lightning/mydump/loader.go | 66 +--- pkg/lightning/mydump/parquet_parser.go | 379 ++++++++++---------- pkg/lightning/mydump/parquet_parser_test.go | 12 +- 11 files changed, 232 insertions(+), 298 deletions(-) diff --git a/go.mod b/go.mod index 3ea2140f307a3..3cbe5853ed721 100644 --- a/go.mod +++ b/go.mod @@ -151,7 +151,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9 +require github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index c5f3890c29a65..6243e80d29b2b 100644 --- a/go.sum +++ b/go.sum @@ -518,6 +518,8 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9 h1:LJGbjOFBrjYubt498ycNLCkXth989t1N9LjWdGuD36U= github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 h1:8QBwC5DOnNBqsXPpeGqD79FcYNTqVR6wDeczNpHLBpA= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/pkg/importer/chunk_process.go b/lightning/pkg/importer/chunk_process.go index 14fc0069aef43..8a57e612173f9 100644 --- a/lightning/pkg/importer/chunk_process.go +++ b/lightning/pkg/importer/chunk_process.go @@ -107,7 +107,7 @@ func openParser( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, cfg.TiDB.SQLMode, reader, blockBufSize, ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParserWithMeta(ctx, store, reader, chunk.FileMeta.Path, chunk.FileMeta.ParquetMeta) + parser, err = mydump.NewParquetParser(ctx, store, reader, chunk.FileMeta.Path, chunk.FileMeta.ParquetMeta) if err != nil { return nil, err } diff --git a/lightning/pkg/importer/get_pre_info.go b/lightning/pkg/importer/get_pre_info.go index 77c6d6558c8e4..53d1df76cd926 100644 --- a/lightning/pkg/importer/get_pre_info.go +++ b/lightning/pkg/importer/get_pre_info.go @@ -489,7 +489,10 @@ func (p *PreImportInfoGetterImpl) ReadFirstNRowsByFileMeta(ctx context.Context, case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, dataFileMeta.Path) + parser, err = mydump.NewParquetParser( + ctx, p.srcStorage, reader, + dataFileMeta.Path, mydump.GetDefaultParquetMeta(), + ) if err != nil { return nil, nil, errors.Trace(err) } @@ -659,7 +662,10 @@ func (p *PreImportInfoGetterImpl) sampleDataFromTable( case mydump.SourceTypeSQL: parser = mydump.NewChunkParser(ctx, p.cfg.TiDB.SQLMode, reader, blockBufSize, p.ioWorkers) case mydump.SourceTypeParquet: - parser, err = mydump.NewParquetParser(ctx, p.srcStorage, reader, sampleFile.Path) + parser, err = mydump.NewParquetParser( + ctx, p.srcStorage, reader, + sampleFile.Path, mydump.GetDefaultParquetMeta(), + ) if err != nil { return 0.0, false, errors.Trace(err) } diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index e2289244d08d8..fa6c5f5b92272 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -778,6 +778,7 @@ ChunkLoop: } chunk.FileMeta.ParquetMeta.UseSampleAllocator = false + chunk.FileMeta.ParquetMeta.MemoryQuota = mydump.GetMemoryQuota(rc.cfg.App.RegionConcurrency) cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { setError(err) diff --git a/pkg/executor/import_into.go b/pkg/executor/import_into.go index 8c876a7629bb9..49588a4a1f7f0 100644 --- a/pkg/executor/import_into.go +++ b/pkg/executor/import_into.go @@ -17,7 +17,6 @@ package executor import ( "context" "fmt" - "runtime/debug" "github.com/google/uuid" "github.com/pingcap/errors" @@ -54,11 +53,10 @@ const unknownImportedRowCount = -1 // ImportIntoExec represents a IMPORT INTO executor. type ImportIntoExec struct { exec.BaseExecutor - selectExec exec.Executor - userSctx sessionctx.Context - controller *importer.LoadDataController - stmt string - prevGCPercentage int + selectExec exec.Executor + userSctx sessionctx.Context + controller *importer.LoadDataController + stmt string plan *plannercore.ImportInto tbl table.Table @@ -113,11 +111,6 @@ func (e *ImportIntoExec) Next(ctx context.Context, req *chunk.Chunk) (err error) return err2 } - // Set GCPercentage to 50 for parquet format to prevent OOM. - if e.controller.Format == importer.DataFormatParquet { - e.prevGCPercentage = debug.SetGCPercent(50) - } - // must use a new session to pre-check, else the stmt in show processlist will be changed. newSCtx, err2 := CreateSession(e.userSctx) if err2 != nil { @@ -338,9 +331,6 @@ func (e *ImportIntoExec) importFromSelect(ctx context.Context) error { } func (e *ImportIntoExec) Close() error { - if e.prevGCPercentage > 0 { - debug.SetGCPercent(e.prevGCPercentage) - } return e.BaseExecutor.Close() } diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index d3afac437fea3..54f3c7377882c 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -60,7 +60,6 @@ import ( "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" "github.com/pingcap/tidb/pkg/util/filter" "github.com/pingcap/tidb/pkg/util/logutil" - "github.com/pingcap/tidb/pkg/util/memory" "github.com/pingcap/tidb/pkg/util/stringutil" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -1134,7 +1133,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet { - _, memoryUsage, memoryUsageFull, err := mydump.SampleParquetFileProperty(ctx, *dataFiles[0], e.dataStore) + _, memoryUsage, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) if err != nil { return errors.Trace(err) } @@ -1142,7 +1141,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // To reduce the memory usage, we only use streaming mode to read file. // TODO(joechenrh): set a more proper memory quota dataFile.ParquetMeta = mydump.ParquetFileMeta{ - MemoryUsage: memoryUsage, + MemoryUsageStream: memoryUsage, MemoryUsageFull: memoryUsageFull, MemoryQuota: mydump.GetMemoryQuota(runtime.NumCPU()), UseStreaming: true, @@ -1150,15 +1149,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } } - // Adjust thread count for parquet - memTotal, err := memory.MemTotal() - if err == nil { - limit := min(int(memTotal)*50/100/int(dataFiles[0].ParquetMeta.MemoryUsage), e.Plan.ThreadCnt) - limit = max(limit, 1) - log.L().Info("adjust IMPORT INTO thread count for parquet", - zap.Int("thread count", e.Plan.ThreadCnt), zap.Int("after", limit)) - e.Plan.ThreadCnt = limit - } + // TODO(joechnerh): maybe we can adjust thread count for parquet here } e.dataFiles = dataFiles @@ -1255,7 +1246,7 @@ func (e *LoadDataController) GetParser( nil, ) case DataFormatParquet: - parser, err = mydump.NewParquetParserWithMeta( + parser, err = mydump.NewParquetParser( ctx, e.dataStore, reader, diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index eb673d0762cc3..06c968c826359 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -61,9 +61,7 @@ func SetMemoryLimitForParquet(percent int, useGlobal bool) { } memLimit = int(memTotal) * min(percent, 90) / 100 memLimiter = membuf.NewLimiter(memLimit) - if useGlobal { - InitializeGlobalArena(memLimit) - } + InitializeGlobalArena(memLimit, useGlobal) log.L().Info("set memory limit", zap.Int("total memory", int(memTotal)), @@ -113,6 +111,7 @@ type arena interface { type arenaPool struct { arenas chan arena allocated int + reuse bool lock sync.Mutex } @@ -174,7 +173,12 @@ func (ap *arenaPool) put(a arena) { return } - ap.arenas <- a + if ap.reuse { + ap.arenas <- a + } else { + ap.allocated-- + ap.adjustGCPercent() + } } func (ap *arenaPool) free() { @@ -203,7 +207,7 @@ func (alloc *defaultAllocator) init() { alloc.allocatedBuf = make(map[uintptr]int, 8) } -func (alloc *defaultAllocator) Allocate(size int) []byte { +func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { for i, a := range alloc.arenas { if buf := a.allocate(size); buf != nil { alloc.allocatedBuf[addressOf(buf)] = i @@ -236,9 +240,9 @@ func (alloc *defaultAllocator) Free(buf []byte) { } } -func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { +func (alloc *defaultAllocator) Reallocate(size int, buf []byte, tp memory.BufferType) []byte { alloc.Free(buf) - return alloc.Allocate(size) + return alloc.Allocate(size, tp) } func (alloc *defaultAllocator) Close() { @@ -262,9 +266,10 @@ func GetDefaultAllocator() memory.Allocator { // InitializeGlobalArena initialize a global arena pool. // If you call this function, remember to call FreeMemory. -func InitializeGlobalArena(size int) { +func InitializeGlobalArena(size int, reuse bool) { maxArenaCount = size / defaultArenaSize globalPool = &arenaPool{} + globalPool.reuse = reuse globalPool.arenas = make(chan arena, maxArenaCount) } diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 84f4a2e1f1b85..32c3e72785220 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -89,7 +89,7 @@ type MDTableMeta struct { // ParquetFileMeta contains some analyzed metadata for a parquet file by MyDumper Loader. type ParquetFileMeta struct { Rows int64 // row count - MemoryUsage int // memory usage for streaming mode + MemoryUsageStream int // memory usage for streaming mode MemoryUsageFull int // memory usage for non-streaming mode MemoryQuota int // memory quota for current file reader to use non-streaming mode UseStreaming bool // whether use streaming mode @@ -558,7 +558,7 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size s.sampledParquetRowSizes[tableName], s.sampledParquetMemoryUsage[tableName], s.sampledParquetMemoryUsageFull[tableName], - err = SampleParquetFileProperty(ctx, info.FileMeta, s.loader.GetStore()) + err = SampleStatisticsFromParquet(ctx, info.FileMeta, s.loader.GetStore()) if err != nil { logger.Error("fail to sample parquet row size", zap.String("category", "loader"), zap.String("schema", res.Schema), zap.String("table", res.Name), @@ -579,7 +579,7 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size if m, ok := metric.FromContext(ctx); ok { m.RowsCounter.WithLabelValues(metric.StateTotalRestore, tableName).Add(float64(totalRowCount)) } - info.FileMeta.ParquetMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] + info.FileMeta.ParquetMeta.MemoryUsageStream = s.sampledParquetMemoryUsage[tableName] info.FileMeta.ParquetMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] info.FileMeta.ParquetMeta.UseStreaming = true info.FileMeta.ParquetMeta.UseSampleAllocator = false @@ -867,63 +867,3 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store } return float64(tot) / float64(pos), nil } - -// SampleParquetFileProperty samples row size and memory usage of the parquet file. -func SampleParquetFileProperty( - ctx context.Context, - fileMeta SourceFileMeta, - store storage.ExternalStorage, -) ( - avgRowSize float64, - memoryUsage int, - memoryUsageFull int, - err error, -) { - totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta) - if totalRowCount == 0 || err != nil { - return 0, 0, 0, err - } - - reader, err := store.Open(ctx, fileMeta.Path, nil) - if err != nil { - return 0, 0, 0, err - } - - parquetMeta := fileMeta.ParquetMeta - parquetMeta.UseStreaming = true - parquetMeta.UseSampleAllocator = true - parser, err := NewParquetParserWithMeta(ctx, store, reader, fileMeta.Path, parquetMeta) - if err != nil { - //nolint: errcheck - reader.Close() - return 0, 0, 0, err - } - //nolint: errcheck - defer parser.Close() - - var ( - rowSize int64 - rowCount int64 - ) - for { - err = parser.ReadRow() - if err != nil { - if errors.Cause(err) == io.EOF { - break - } - return 0, 0, 0, err - } - lastRow := parser.LastRow() - rowCount++ - rowSize += int64(lastRow.Length) - parser.RecycleRow(lastRow) - if rowSize > maxSampleParquetDataSize || rowCount > maxSampleParquetRowCount { - break - } - } - - avgRowSize = float64(rowSize) / float64(rowCount) - memoryUsage, memoryUsageFull = parser.GetMemoryUsage() - - return avgRowSize, memoryUsage, memoryUsageFull, nil -} diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index cc4269dcb6db8..d5284e789cc25 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -23,7 +23,6 @@ import ( "math/big" "reflect" "strings" - "sync/atomic" "time" "github.com/joechenrh/arrow-go/v18/arrow/memory" @@ -50,29 +49,6 @@ const ( timeLayout = "2006-01-02 15:04:05.999999" ) -type allocatorWithStats struct { - baseAllocator memory.Allocator - allocated atomic.Int64 -} - -func (a *allocatorWithStats) Allocate(size int) []byte { - b := a.baseAllocator.Allocate(size) - a.allocated.Add(int64(cap(b))) - return b -} - -func (a *allocatorWithStats) Reallocate(size int, b []byte) []byte { - return a.baseAllocator.Reallocate(size, b) -} - -func (a *allocatorWithStats) Free(b []byte) { - a.baseAllocator.Free(b) -} - -func (a *allocatorWithStats) Allocated() int64 { - return a.allocated.Load() -} - type columnDumper struct { reader file.ColumnChunkReader batchSize int64 @@ -305,18 +281,14 @@ func (pf *parquetFileWrapper) InitBuffer(bufSize int) { } func (pf *parquetFileWrapper) readNBytes(p []byte) (int, error) { - read := 0 - for read < len(p) { - n, err := pf.Read(p[read:]) - read += n - if err != nil { - return read, err - } + n, err := io.ReadFull(pf, p) + if err != nil && err != io.EOF { + return 0, errors.Trace(err) } - if read != len(p) { - return read, errors.Errorf("Error reading %d bytes, only read %d bytes", len(p), read) + if n != len(p) { + return n, errors.Errorf("Error reading %d bytes, only read %d bytes", len(p), n) } - return read, nil + return n, nil } // ReadAt implemement ReaderAt interface @@ -407,78 +379,6 @@ type ParquetParser struct { memLimiter *membuf.Limiter } -// GetMemoryUsage estimate the memory usage for this file. -func (pp *ParquetParser) GetMemoryUsage() (memoryUsageStream, memoryUsageNonStream int) { - // Initialize column reader - if pp.dumpers[0].reader == nil { - if err := pp.ReadRow(); err != nil { - return math.MaxInt, math.MaxInt - } - } - - // All the columns share the same data page size, - // so we only need to read one column chunk. - dumper := pp.dumpers[0] - for { - if _, ok := dumper.Next(); !ok { - break - } - } - - alloc, ok := pp.alloc.(*sampleAllocator) - if !ok { - return 0, 0 - } - bufSizes := alloc.allocated - - /* - * We have collected all the allocations, and the allocation order are: - * read buffer(repeat n times), decompressed dict buffer, compressed buffer, decompressed data page buffer, compressed data page buffer, ... - * since the compressed buffer is released after decompression, we estimate the memory usage as: - * (AllocSize(decompressed dict buffer) + AllocSize(decompressed data page buffer) + AllocSize(read buffer) + AllocSize(parquet read buffer)) * num_cols - */ - - numColumns := len(pp.columnNames) - dictUsage := 0 - dataPageUsage := 0 - readBufferUsageStream := (AllocSize(bufSizes[0]) + AllocSize(defaultBufSize)) * numColumns - - readBufferUsageNonStream := 0 - meta := pp.readers[0].MetaData() - for _, rg := range meta.RowGroups { - currUsage := 0 - for _, c := range rg.Columns { - currUsage += AllocSize(int(c.MetaData.GetTotalCompressedSize())) - } - readBufferUsageNonStream = max(readBufferUsageNonStream, currUsage) - } - readBufferUsageNonStream += AllocSize(defaultBufSize) * len(pp.columnNames) - - hasDict := true - if 5*numColumns > len(bufSizes) { - hasDict = false - } - - if hasDict { - for i := numColumns; i < 5*numColumns; i += 4 { - dictUsage = max(dictUsage, AllocSize(bufSizes[i])) - dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i+2])) - } - for i := 5 * numColumns; i < len(bufSizes); i += 2 { - dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) - } - } else { - for i := numColumns; i < len(bufSizes); i += 2 { - dataPageUsage = max(dataPageUsage, AllocSize(bufSizes[i])) - } - } - - pageUsage := (dataPageUsage + dictUsage) * numColumns - - return roundUp(pageUsage+readBufferUsageStream, defaultArenaSize), - roundUp(pageUsage+readBufferUsageNonStream, defaultArenaSize) -} - func (pp *ParquetParser) setStringData(row, col int, val interface{}) { vba, _ := val.(parquet.ByteArray) pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") @@ -962,97 +862,56 @@ func ReadParquetFileRowCountByFile( return reader.MetaData().NumRows, nil } -// NewParquetParser generates a parquet parser. -func NewParquetParser( - ctx context.Context, - store storage.ExternalStorage, - r storage.ReadSeekCloser, - path string, -) (*ParquetParser, error) { - wrapper, ok := r.(*parquetFileWrapper) - if !ok { - wrapper = &parquetFileWrapper{ - ReadSeekCloser: r, - store: store, - ctx: ctx, - path: path, - } - wrapper.InitBuffer(defaultBufSize) - } - - allocator := GetDefaultAllocator() - prop := parquet.NewReaderProperties(allocator) - prop.BufferedStreamEnabled = true - - reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) - if err != nil { - return nil, errors.Trace(err) - } - - fileSchema := reader.MetaData().Schema - columnMetas := make([]convertedType, fileSchema.NumColumns()) - columnNames := make([]string, 0, fileSchema.NumColumns()) - - for i := range columnMetas { - desc := reader.MetaData().Schema.Column(i) - columnNames = append(columnNames, strings.ToLower(desc.Name())) - - logicalType := desc.LogicalType() - if logicalType.IsValid() { - columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() - } else { - columnMetas[i].converted = desc.ConvertedType() - pnode, _ := desc.SchemaNode().(*schema.PrimitiveNode) - columnMetas[i].decimalMeta = pnode.DecimalMetadata() - } - } - - subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) - subreaders = append(subreaders, reader) - for i := 1; i < fileSchema.NumColumns(); i++ { - newWrapper, err := wrapper.Open("") - if err != nil { - return nil, errors.Trace(err) - } - reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) - if err != nil { - return nil, errors.Trace(err) - } - subreaders = append(subreaders, reader) - } - - parser := &ParquetParser{ - readers: subreaders, - colMetas: columnMetas, - columnNames: columnNames, - alloc: allocator, - logger: log.FromContext(ctx), - } - if err := parser.Init(); err != nil { - return nil, errors.Trace(err) - } - - return parser, nil -} - +// sampleAllocator is used to collection memory usage in parquet reader. type sampleAllocator struct { - allocated []int + maxCompressedLength int + maxDataPage int + totalDictPage int + otherAllocated int } -func (sa *sampleAllocator) Allocate(size int) []byte { - sa.allocated = append(sa.allocated, size) +func (sa *sampleAllocator) Allocate(size int, tp memory.BufferType) []byte { + size = AllocSize(size) + switch tp { + case memory.BufferCompressed: + sa.maxCompressedLength = max(sa.maxCompressedLength, size) + case memory.BufferDataPage: + sa.maxDataPage = max(sa.maxDataPage, size) + case memory.BufferDictionary: + // For each row group, we need to store all dictionary pages to decode data page. + sa.totalDictPage += size + default: + sa.otherAllocated += size + } return make([]byte, size) } func (*sampleAllocator) Free([]byte) {} -func (sa *sampleAllocator) Reallocate(size int, _ []byte) []byte { - sa.allocated = append(sa.allocated, size) - return make([]byte, size) +func (sa *sampleAllocator) Reallocate(size int, _ []byte, tp memory.BufferType) []byte { + return sa.Allocate(size, tp) } -// NewParquetParserWithMeta generates a parquet parser. -func NewParquetParserWithMeta( +func (sa *sampleAllocator) reset() { + sa.maxCompressedLength = 0 + sa.maxDataPage = 0 + sa.totalDictPage = 0 + sa.otherAllocated = 0 +} + +// GetDefaultParquetMeta return a default file meta +func GetDefaultParquetMeta() ParquetFileMeta { + return ParquetFileMeta{ + MemoryUsageStream: 0, + MemoryUsageFull: math.MaxInt32, + MemoryQuota: 0, + UseSampleAllocator: true, + UseStreaming: true, + } +} + +// NewParquetParser generates a parquet parser. +func NewParquetParser( ctx context.Context, store storage.ExternalStorage, r storage.ReadSeekCloser, @@ -1064,11 +923,11 @@ func NewParquetParserWithMeta( if meta.UseSampleAllocator { memoryUsage = 0 meta.UseStreaming = true - } else if meta.MemoryUsageFull < meta.MemoryQuota { + } else if meta.MemoryUsageFull <= meta.MemoryQuota || meta.MemoryUsageFull == meta.MemoryUsageStream { memoryUsage = meta.MemoryUsageFull meta.UseStreaming = false } else { - memoryUsage = meta.MemoryUsage + memoryUsage = meta.MemoryUsageStream meta.UseStreaming = true } memoryUsage = min(memoryUsage, memLimit) @@ -1076,7 +935,9 @@ func NewParquetParserWithMeta( log.FromContext(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), - zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryUsage>>20)), + zap.String("memory usage full", fmt.Sprintf("%d MB", meta.MemoryUsageFull>>20)), + zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryQuota>>20)), + zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), zap.Bool("streaming mode", meta.UseStreaming), zap.Bool("use sample allocator", meta.UseSampleAllocator), ) @@ -1161,3 +1022,141 @@ func NewParquetParserWithMeta( return parser, nil } + +// SampleStatisticsFromParquet samples row size and memory usage of the parquet file. +func SampleStatisticsFromParquet( + ctx context.Context, + fileMeta SourceFileMeta, + store storage.ExternalStorage, +) ( + avgRowSize float64, + memoryUsage int, + memoryUsageFull int, + err error, +) { + r, err := store.Open(ctx, fileMeta.Path, nil) + if err != nil { + return 0, 0, 0, err + } + + wrapper := &parquetFileWrapper{ + ReadSeekCloser: r, + store: store, + ctx: ctx, + path: fileMeta.Path, + } + wrapper.InitBuffer(defaultBufSize) + + prop := parquet.NewReaderProperties(nil) + prop.BufferedStreamEnabled = true + reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) + if err != nil { + return 0, 0, 0, errors.Trace(err) + } + + fileSchema := reader.MetaData().Schema + columnMetas := make([]convertedType, fileSchema.NumColumns()) + columnNames := make([]string, 0, fileSchema.NumColumns()) + + for i := range columnMetas { + desc := reader.MetaData().Schema.Column(i) + columnNames = append(columnNames, strings.ToLower(desc.Name())) + + logicalType := desc.LogicalType() + if logicalType.IsValid() { + columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() + } else { + columnMetas[i].converted = desc.ConvertedType() + pnode, _ := desc.SchemaNode().(*schema.PrimitiveNode) + columnMetas[i].decimalMeta = pnode.DecimalMetadata() + } + } + + subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) + allSampleAllocators := make([]*sampleAllocator, 0, fileSchema.NumColumns()) + for i := 0; i < fileSchema.NumColumns(); i++ { + newWrapper, err := wrapper.Open("") + if err != nil { + return 0, 0, 0, errors.Trace(err) + } + + alloc := &sampleAllocator{} + prop := parquet.NewReaderProperties(alloc) + prop.BufferedStreamEnabled = true + allSampleAllocators = append(allSampleAllocators, alloc) + + reader, err := file.NewParquetReader( + newWrapper, + file.WithReadProps(prop), + file.WithMetadata(reader.MetaData()), + ) + + if err != nil { + return 0, 0, 0, errors.Trace(err) + } + subreaders = append(subreaders, reader) + } + + parser := &ParquetParser{ + readers: subreaders, + colMetas: columnMetas, + columnNames: columnNames, + logger: log.FromContext(ctx), + base64: fileMeta.ParquetMeta.Base64, + } + if err := parser.Init(); err != nil { + return 0, 0, 0, errors.Trace(err) + } + + //nolint: errcheck + defer parser.Close() + + var ( + rowSize int64 + rowCount int64 + ) + + if reader.NumRowGroups() == 0 || reader.MetaData().RowGroups[0].NumRows == 0 { + return 0, 0, 0, nil + } + + totalReadRows := reader.MetaData().RowGroups[0].NumRows + for i := 0; i < int(totalReadRows); i++ { + err = parser.ReadRow() + if err != nil { + if errors.Cause(err) == io.EOF { + break + } + return 0, 0, 0, err + } + lastRow := parser.LastRow() + rowCount++ + rowSize += int64(lastRow.Length) + parser.RecycleRow(lastRow) + } + + avgRowSize = float64(rowSize) / float64(rowCount) + + memoryUsageStream, memoryUsageFull := 0, 0 + for _, alloc := range allSampleAllocators { + memoryUsageFull += alloc.maxDataPage + memoryUsageFull += alloc.totalDictPage + memoryUsageStream += alloc.otherAllocated + memoryUsageStream += alloc.maxDataPage + memoryUsageStream += alloc.totalDictPage + } + + pageBufferFull := 0 + for _, rg := range parser.readers[0].MetaData().RowGroups { + totalUsage := 0 + for _, c := range rg.Columns { + totalUsage += AllocSize(int(c.MetaData.GetTotalCompressedSize())) + } + pageBufferFull = max(pageBufferFull, totalUsage) + } + memoryUsageFull += pageBufferFull + + memoryUsageStream = roundUp(memoryUsageStream, defaultArenaSize) + memoryUsageFull = roundUp(memoryUsageFull, defaultArenaSize) + return avgRowSize, memoryUsageStream, memoryUsageFull, nil +} diff --git a/pkg/lightning/mydump/parquet_parser_test.go b/pkg/lightning/mydump/parquet_parser_test.go index d4fcfc34cbe4f..2d8c141554dc1 100644 --- a/pkg/lightning/mydump/parquet_parser_test.go +++ b/pkg/lightning/mydump/parquet_parser_test.go @@ -59,7 +59,7 @@ func TestParquetParser(t *testing.T) { require.NoError(t, err) r, err := store.Open(context.TODO(), name, nil) require.NoError(t, err) - reader, err := NewParquetParser(context.TODO(), store, r, name) + reader, err := NewParquetParser(context.TODO(), store, r, name, GetDefaultParquetMeta()) require.NoError(t, err) defer reader.Close() @@ -136,7 +136,7 @@ func TestParquetVariousTypes(t *testing.T) { require.NoError(t, err) r, err := store.Open(context.TODO(), name, nil) require.NoError(t, err) - reader, err := NewParquetParser(context.TODO(), store, r, name) + reader, err := NewParquetParser(context.TODO(), store, r, name, GetDefaultParquetMeta()) require.NoError(t, err) defer reader.Close() @@ -192,7 +192,7 @@ func TestParquetVariousTypes(t *testing.T) { r, err = store.Open(context.TODO(), fileName, nil) require.NoError(t, err) - reader, err = NewParquetParser(context.TODO(), store, r, fileName) + reader, err = NewParquetParser(context.TODO(), store, r, fileName, GetDefaultParquetMeta()) require.NoError(t, err) defer reader.Close() @@ -232,7 +232,7 @@ func TestParquetVariousTypes(t *testing.T) { r, err = store.Open(context.TODO(), fileName, nil) require.NoError(t, err) - reader, err = NewParquetParser(context.TODO(), store, r, fileName) + reader, err = NewParquetParser(context.TODO(), store, r, fileName, GetDefaultParquetMeta()) require.NoError(t, err) defer reader.Close() @@ -253,7 +253,7 @@ func TestParquetAurora(t *testing.T) { fileName := "test.parquet" r, err := store.Open(context.TODO(), fileName, nil) require.NoError(t, err) - parser, err := NewParquetParser(context.TODO(), store, r, fileName) + parser, err := NewParquetParser(context.TODO(), store, r, fileName, GetDefaultParquetMeta()) require.NoError(t, err) require.Equal(t, []string{"id", "val1", "val2", "d1", "d2", "d3", "d4", "d5", "d6"}, parser.Columns()) @@ -310,7 +310,7 @@ func TestHiveParquetParser(t *testing.T) { require.NoError(t, err) r, err := store.Open(context.TODO(), name, nil) require.NoError(t, err) - reader, err := NewParquetParser(context.TODO(), store, r, name) + reader, err := NewParquetParser(context.TODO(), store, r, name, GetDefaultParquetMeta()) require.NoError(t, err) defer reader.Close() // UTC+0:00 From f51fac367d76130a9a0b99c64cef84407d438b41 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sat, 15 Feb 2025 12:54:15 +0800 Subject: [PATCH 48/93] [test] Add base64 try fix OOM for global sort --- .../importinto/encode_and_sort_operator.go | 5 ++++ pkg/disttask/importinto/task_executor.go | 2 ++ pkg/executor/importer/import.go | 29 +++++++++++++++++++ pkg/lightning/backend/external/merge.go | 2 +- pkg/lightning/config/config.go | 11 +++++++ pkg/lightning/mydump/loader.go | 1 + pkg/lightning/mydump/parquet_parser.go | 12 ++++++++ 7 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index de6f54b8cc2b0..07e23ea291348 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -229,6 +229,11 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) dataKVMemSizePerCon, perIndexKVMemSizePerCon uint64) { indexKVGroupCnt := getNumOfIndexGenKV(plan.DesiredTableInfo) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) + + // For parquet file format, we allocate 50% of the memory to file reader. + if plan.Format == "parquet" { + memPerCon /= 2 + } // we use half of the total available memory for data writer, and the other half // for encoding and other stuffs, it's an experience value, might not optimal. // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index e78eba90584cb..a7aa0e3ec6712 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -302,6 +302,7 @@ func (m *mergeSortStepExecutor) Init(ctx context.Context) error { } m.controller = controller dataKVMemSizePerCon, perIndexKVMemSizePerCon := getWriterMemorySizeLimit(m.GetResource(), &m.taskMeta.Plan) + // TODO(joechenrh): set MaxMergingFilesPerThread here? m.dataKVPartSize = max(external.MinUploadPartSize, int64(dataKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) m.indexKVPartSize = max(external.MinUploadPartSize, int64(perIndexKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) @@ -338,6 +339,7 @@ func (m *mergeSortStepExecutor) RunSubtask(ctx context.Context, subtask *proto.S if sm.KVGroup != dataKVGroup { partSize = m.indexKVPartSize } + // TODO(joechenrh): set MaxMergingFilesPerThread here? err = external.MergeOverlappingFiles( logutil.WithFields(ctx, zap.String("kv-group", sm.KVGroup), zap.Int64("subtask-id", subtask.ID)), sm.DataFiles, diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 54f3c7377882c..78e48707f01ab 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -86,6 +86,7 @@ const ( fieldsEnclosedByOption = "fields_enclosed_by" fieldsEscapedByOption = "fields_escaped_by" fieldsDefinedNullByOption = "fields_defined_null_by" + fieldsEncodedByOption = "fields_encoded_by" linesTerminatedByOption = "lines_terminated_by" skipRowsOption = "skip_rows" splitFileOption = "split_file" @@ -112,6 +113,7 @@ var ( fieldsEnclosedByOption: true, fieldsEscapedByOption: true, fieldsDefinedNullByOption: true, + fieldsEncodedByOption: true, linesTerminatedByOption: true, skipRowsOption: true, splitFileOption: false, @@ -217,6 +219,8 @@ type Plan struct { Charset *string ImportantSysVars map[string]string + FieldsEncodedBy config.FieldEncodeType + // used for LOAD DATA and CSV format of IMPORT INTO FieldNullDef []string // this is not used in IMPORT INTO @@ -512,6 +516,19 @@ func (e *LoadDataController) checkFieldParams() error { if e.Format != DataFormatCSV && e.Format != DataFormatParquet && e.Format != DataFormatSQL { return exeerrors.ErrLoadDataUnsupportedFormat.GenWithStackByArgs(e.Format) } + if e.FieldsEncodedBy == config.FieldEncodeBase64 { + if e.Format == DataFormatCSV { + if e.FieldsEnclosedBy != "" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_enclosed_by must be empty when fields_encoded_by is 'base64'") + } + if e.FieldsEscapedBy != "" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_escaped_by must be empty when fields_encoded_by is 'base64'") + } + if e.Charset != nil && *e.Charset != "binary" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("character_set must be 'binary' when fields_encoded_by is 'base64'") + } + } + } } else { if e.NullValueOptEnclosed && len(e.FieldsEnclosedBy) == 0 { return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("must specify FIELDS [OPTIONALLY] ENCLOSED BY when use NULL DEFINED BY OPTIONALLY ENCLOSED") @@ -651,6 +668,17 @@ func (p *Plan) initOptions(ctx context.Context, seCtx sessionctx.Context, option } p.FieldNullDef = []string{v} } + if opt, ok := specifiedOptions[fieldsEncodedByOption]; ok { + v, err := optAsString(opt) + if err != nil { + return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) + } + v = strings.ToLower(v) + if config.FieldEncodeType(v) != config.FieldEncodeBase64 { + return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) + } + p.FieldsEncodedBy = config.FieldEncodeType(v) + } if opt, ok := specifiedOptions[linesTerminatedByOption]; ok { v, err := optAsString(opt) // cannot set terminator to empty string explicitly @@ -1146,6 +1174,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { MemoryQuota: mydump.GetMemoryQuota(runtime.NumCPU()), UseStreaming: true, UseSampleAllocator: false, + Base64: e.FieldsEncodedBy == config.FieldEncodeBase64, } } diff --git a/pkg/lightning/backend/external/merge.go b/pkg/lightning/backend/external/merge.go index 503b037d542a6..cc0035c186811 100644 --- a/pkg/lightning/backend/external/merge.go +++ b/pkg/lightning/backend/external/merge.go @@ -30,7 +30,7 @@ var ( // MaxMergingFilesPerThread is the maximum number of files that can be merged by a // single thread. This value comes from the fact that 16 threads are ok to merge 4k // files in parallel, so we set it to 250. - MaxMergingFilesPerThread = 250 + MaxMergingFilesPerThread = 120 // MinUploadPartSize is the minimum size of each part when uploading files to // external storage, which is 5MiB for both S3 and GCS. MinUploadPartSize int64 = 5 * units.MiB diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index b2b862fda8c70..ba303dae956d9 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -803,6 +803,17 @@ func (s *StringOrStringSlice) UnmarshalTOML(in any) error { return nil } +// FieldEncodeType is the type of encoding for a CSV field. +type FieldEncodeType string + +const ( + // FieldEncodeNone means no special encoding. + FieldEncodeNone FieldEncodeType = "" + // FieldEncodeBase64 means the field is encoded in base64. + // this encoding also implies some constraints on other parameters + FieldEncodeBase64 FieldEncodeType = "base64" +) + // CSVConfig is the config for CSV files. type CSVConfig struct { // Separator, Delimiter and Terminator should all be in utf8mb4 encoding. diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 32c3e72785220..0760874b9445a 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -94,6 +94,7 @@ type ParquetFileMeta struct { MemoryQuota int // memory quota for current file reader to use non-streaming mode UseStreaming bool // whether use streaming mode UseSampleAllocator bool // whether use sample allocator + Base64 bool } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index d5284e789cc25..2e1d27d38f74c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -17,6 +17,7 @@ package mydump import ( "bytes" "context" + "encoding/base64" "fmt" "io" "math" @@ -375,6 +376,8 @@ type ParquetParser struct { lastRow Row logger log.Logger + base64 bool + memoryUsage int memLimiter *membuf.Limiter } @@ -660,6 +663,14 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { continue } setFunc(storeOffset+i, col, val) + if pp.base64 { + var decoded []byte + decoded, err = base64.StdEncoding.DecodeString(pp.rows[storeOffset+i][col].GetString()) + if err != nil { + return 0, err + } + pp.rows[storeOffset+i][col].SetString(string(decoded), "utf8mb4_bin") + } } } @@ -1013,6 +1024,7 @@ func NewParquetParser( columnNames: columnNames, alloc: allocator, logger: log.FromContext(ctx), + base64: meta.Base64, memoryUsage: memoryUsage, memLimiter: memLimiter, } From 4c49aa56f295f3c29f820d1fb895f05726ad382e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 13:48:45 +0800 Subject: [PATCH 49/93] update bazel --- DEPS.bzl | 12 ++++++------ lightning/pkg/importer/BUILD.bazel | 2 -- pkg/lightning/mydump/BUILD.bazel | 1 + 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 4669da2a2bd7d..654cdfc9b361e 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -4199,13 +4199,13 @@ def go_deps(): name = "com_github_joechenrh_arrow_go_v18", build_file_proto_mode = "disable_global", importpath = "github.com/joechenrh/arrow-go/v18", - sha256 = "fd8f195bd73fd66342c6bf66c3ce6977bcc7544a7aab7fc2e2002afbcf95c7a9", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250107060625-e99480fe0ed9", + sha256 = "f0cfa403295cb81867af7282c5593654c2c659751460dc5d183560528c479fde", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250215045230-203e420514b7", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250107060625-e99480fe0ed9.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", ], ) go_repository( diff --git a/lightning/pkg/importer/BUILD.bazel b/lightning/pkg/importer/BUILD.bazel index f6824ae230677..50baddbc915ee 100644 --- a/lightning/pkg/importer/BUILD.bazel +++ b/lightning/pkg/importer/BUILD.bazel @@ -20,7 +20,6 @@ go_library( visibility = ["//visibility:public"], deps = [ "//br/pkg/errors", - "//br/pkg/membuf", "//br/pkg/pdutil", "//br/pkg/storage", "//br/pkg/streamhelper", @@ -75,7 +74,6 @@ go_library( "//pkg/util/engine", "//pkg/util/etcd", "//pkg/util/extsort", - "//pkg/util/memory", "//pkg/util/redact", "//pkg/util/regexpr-router", "//pkg/util/set", diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 27a5e9235340e..abbc561f6ee85 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -20,6 +20,7 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/lightning/mydump", visibility = ["//visibility:public"], deps = [ + "//br/pkg/membuf", "//br/pkg/storage", "//pkg/config", "//pkg/errno", From 97fa5f9fb42eaa2663fcf57b40e4b6ea2b625861 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 13:59:50 +0800 Subject: [PATCH 50/93] update bazel --- pkg/lightning/mydump/BUILD.bazel | 2 +- pkg/lightning/mydump/allocator.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index abbc561f6ee85..7457e881d5176 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -20,13 +20,13 @@ go_library( importpath = "github.com/pingcap/tidb/pkg/lightning/mydump", visibility = ["//visibility:public"], deps = [ - "//br/pkg/membuf", "//br/pkg/storage", "//pkg/config", "//pkg/errno", "//pkg/lightning/common", "//pkg/lightning/config", "//pkg/lightning/log", + "//pkg/lightning/membuf", "//pkg/lightning/metric", "//pkg/lightning/worker", "//pkg/parser", diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 06c968c826359..a860950399eb1 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -24,8 +24,8 @@ import ( "unsafe" "github.com/joechenrh/arrow-go/v18/arrow/memory" - "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/lightning/membuf" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" ) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 2e1d27d38f74c..294f458883236 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -31,9 +31,9 @@ import ( "github.com/joechenrh/arrow-go/v18/parquet/file" "github.com/joechenrh/arrow-go/v18/parquet/schema" "github.com/pingcap/errors" - "github.com/pingcap/tidb/br/pkg/membuf" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/types" "go.uber.org/zap" ) From 291d7a9ab93c37f763381546487f86a622797ab5 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 14:32:06 +0800 Subject: [PATCH 51/93] update build --- pkg/lightning/mydump/parquet_parser.go | 42 +++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 294f458883236..1187884b55e63 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -60,7 +60,7 @@ type columnDumper struct { levelsBuffered int64 defLevels []int16 repLevels []int16 - values []interface{} + values []any valueBuffer any } @@ -92,7 +92,7 @@ func createcolumnDumper(tp parquet.Type) *columnDumper { batchSize: int64(batchSize), defLevels: make([]int16, batchSize), repLevels: make([]int16, batchSize), - values: make([]interface{}, batchSize), + values: make([]any, batchSize), valueBuffer: valueBuffer, } } @@ -149,7 +149,7 @@ func (dump *columnDumper) hasNext() bool { } // Next reads next value from the reader -func (dump *columnDumper) Next() (interface{}, bool) { +func (dump *columnDumper) Next() (any, bool) { if dump.levelOffset == dump.levelsBuffered { if !dump.hasNext() { return nil, false @@ -382,62 +382,62 @@ type ParquetParser struct { memLimiter *membuf.Limiter } -func (pp *ParquetParser) setStringData(row, col int, val interface{}) { +func (pp *ParquetParser) setStringData(row, col int, val any) { vba, _ := val.(parquet.ByteArray) pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") } -func (pp *ParquetParser) setInt32Data(row, col int, val interface{}) { +func (pp *ParquetParser) setInt32Data(row, col int, val any) { v32, _ := val.(int32) pp.rows[row][col].SetInt64(int64(v32)) } -func (pp *ParquetParser) setUint32Data(row, col int, val interface{}) { +func (pp *ParquetParser) setUint32Data(row, col int, val any) { v64, _ := val.(int64) pp.rows[row][col].SetUint64(uint64(v64)) } -func (pp *ParquetParser) setInt64Data(row, col int, val interface{}) { +func (pp *ParquetParser) setInt64Data(row, col int, val any) { v64, _ := val.(int64) pp.rows[row][col].SetInt64(v64) } -func (pp *ParquetParser) setUint64Data(row, col int, val interface{}) { +func (pp *ParquetParser) setUint64Data(row, col int, val any) { v64, _ := val.(int64) pp.rows[row][col].SetUint64(uint64(v64)) } -func (pp *ParquetParser) setTimeMillisData(row, col int, val interface{}) { +func (pp *ParquetParser) setTimeMillisData(row, col int, val any) { v32, _ := val.(int32) timeStr := formatTime(int64(v32), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimeMicrosData(row, col int, val interface{}) { +func (pp *ParquetParser) setTimeMicrosData(row, col int, val any) { v64, _ := val.(int64) timeStr := formatTime(v64, "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimestampMillisData(row, col int, val interface{}) { +func (pp *ParquetParser) setTimestampMillisData(row, col int, val any) { v64, _ := val.(int64) timeStr := formatTime(v64, "MILLIS", timeLayout, utcTimeLayout, true) pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setTimestampMicrosData(row, col int, val interface{}) { +func (pp *ParquetParser) setTimestampMicrosData(row, col int, val any) { v64, _ := val.(int64) timeStr := formatTime(v64, "MICROS", timeLayout, utcTimeLayout, true) pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") } -func (pp *ParquetParser) setDateData(row, col int, val interface{}) { +func (pp *ParquetParser) setDateData(row, col int, val any) { v32, _ := val.(int32) dateStr := time.Unix(int64(v32)*86400, 0).Format(time.DateOnly) pp.rows[row][col].SetString(dateStr, "utf8mb4_bin") } -func (pp *ParquetParser) setDecimalData(row, col int, val interface{}) { +func (pp *ParquetParser) setDecimalData(row, col int, val any) { colTp := pp.dumpers[col].Type() decimal := pp.colMetas[col].decimalMeta @@ -471,7 +471,7 @@ func (pp *ParquetParser) setDecimalData(row, col int, val interface{}) { } } -func (pp *ParquetParser) setBoolData(row, col int, val interface{}) { +func (pp *ParquetParser) setBoolData(row, col int, val any) { boolVal, _ := val.(bool) if boolVal { pp.rows[row][col].SetUint64(1) @@ -480,27 +480,27 @@ func (pp *ParquetParser) setBoolData(row, col int, val interface{}) { pp.rows[row][col].SetUint64(0) } -func (pp *ParquetParser) setFloat32Data(row, col int, val interface{}) { +func (pp *ParquetParser) setFloat32Data(row, col int, val any) { vf32, _ := val.(float32) pp.rows[row][col].SetFloat32(vf32) } -func (pp *ParquetParser) setFloat64Data(row, col int, val interface{}) { +func (pp *ParquetParser) setFloat64Data(row, col int, val any) { vf64, _ := val.(float64) pp.rows[row][col].SetFloat64(vf64) } -func (pp *ParquetParser) setFixedByteArrayData(row, col int, val interface{}) { +func (pp *ParquetParser) setFixedByteArrayData(row, col int, val any) { vfa, _ := val.(parquet.FixedLenByteArray) pp.rows[row][col].SetString(string(vfa), "utf8mb4_bin") } -func (pp *ParquetParser) setByteArrayData(row, col int, val interface{}) { +func (pp *ParquetParser) setByteArrayData(row, col int, val any) { vba, _ := val.(parquet.ByteArray) pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") } -func (pp *ParquetParser) setInt96Data(row, col int, val interface{}) { +func (pp *ParquetParser) setInt96Data(row, col int, val any) { // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 // INT96 timestamp layout // -------------------------- @@ -605,7 +605,7 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { meta := pp.colMetas[col] physicalTp := dumper.Type() - var setFunc func(row, col int, val interface{}) + var setFunc func(row, col int, val any) if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { switch physicalTp { case parquet.Types.Boolean: From 650f9b888ec1cb77fb914b25f3969afcb2163129 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 16:21:36 +0800 Subject: [PATCH 52/93] update build --- pkg/executor/import_into.go | 4 - pkg/lightning/mydump/allocator.go | 151 ++++++++++++------------- pkg/lightning/mydump/loader_test.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 55 +-------- 4 files changed, 81 insertions(+), 131 deletions(-) diff --git a/pkg/executor/import_into.go b/pkg/executor/import_into.go index 49588a4a1f7f0..1c0fa35f47b31 100644 --- a/pkg/executor/import_into.go +++ b/pkg/executor/import_into.go @@ -330,10 +330,6 @@ func (e *ImportIntoExec) importFromSelect(ctx context.Context) error { return nil } -func (e *ImportIntoExec) Close() error { - return e.BaseExecutor.Close() -} - // ImportIntoActionExec represents a import into action executor. type ImportIntoActionExec struct { exec.BaseExecutor diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index a860950399eb1..a46d7e9345b2c 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -20,7 +20,6 @@ import ( "runtime" "runtime/debug" "sync" - "sync/atomic" "unsafe" "github.com/joechenrh/arrow-go/v18/arrow/memory" @@ -30,49 +29,65 @@ import ( "go.uber.org/zap" ) -/* - * There are two usage modes for the memory allocation: - * 1. Call `GetDefaultAllocator` directly to get an allocator. - * 2. Call `InitializeGlobalArena` to initialize the global arena pool, - * so the arena allocated in this node can be reused by subsequent allocation. - * User should remember to call `FreeMemory` after the execution is completed. - */ - var ( - maxArenaCount = 0 // maximum arena count - defaultArenaSize = 256 << 20 // size of each arena + // size of each arena + defaultArenaSize = 256 << 20 - memLimit int // memory limit for parquet reader - memLimiter *membuf.Limiter // memory limiter for parquet reader + // memory limit for parquet reader + readerMemoryLimit int + readerMemoryLimiter *membuf.Limiter - // AllocSize returns actual allocated size in arena + // AllocSize returns actual allocated size from arena AllocSize func(int) int // GetArena creates a new arena GetArena func(int) arena ) -// SetMemoryLimitForParquet set the memory limit for parquet reader and create a global memory pool if necessary. -func SetMemoryLimitForParquet(percent int, useGlobal bool) { +// InitializeGlobalArena initialize a global arena pool. +func InitializeGlobalArena(size int, reuse bool) { + maxArenaCount := size / defaultArenaSize + if globalPool == nil { + globalPool = &arenaPool{ + maxArenaCount: maxArenaCount, + reuse: reuse, + arenas: make(chan arena, 1024), + } + return + } + + globalPool.adjustMaxArenaCount(maxArenaCount) +} + +// FreeMemory free all the memory allocated for arenas. +func FreeMemory() { + if globalPool != nil { + globalPool.free() + } +} + +// SetMemoryLimitForParquet set the memory limit for parquet reader. +// If reuse = true, remember to call FreeMemory to free the memory. +func SetMemoryLimitForParquet(percent int, reuse bool) { memTotal, err := tidbmemory.MemTotal() if err != nil { // Set limit to int max, which means no limiter memTotal = math.MaxInt32 } - memLimit = int(memTotal) * min(percent, 90) / 100 - memLimiter = membuf.NewLimiter(memLimit) - InitializeGlobalArena(memLimit, useGlobal) + readerMemoryLimit = int(memTotal) * min(percent, 90) / 100 + readerMemoryLimiter = membuf.NewLimiter(readerMemoryLimit) + InitializeGlobalArena(readerMemoryLimit, reuse) log.L().Info("set memory limit", zap.Int("total memory", int(memTotal)), - zap.Int("memory limit", int(memLimit)), + zap.Int("memory limit", readerMemoryLimit), ) } // GetMemoryQuota get the memory quota for non-streaming mode read. // TODO(joechenrh): set a more proper memory quota func GetMemoryQuota(concurrency int) int { - quotaPerTask := memLimit / concurrency + quotaPerTask := readerMemoryLimit / concurrency // Because other part like encoder also need memory, // we assume that the reader can use up to 80% of the memroy. @@ -86,8 +101,8 @@ func init() { AllocSize = simpleGetAllocationSize GetArena = getSimpleAllocator - // This is used for `IMPORT INTO``. - // We set the default memory usage to 40% and don't use a global arena pool. + // This is used for `IMPORT INTO`. + // We set the default memory usage to 40% and don't reuse arenas. SetMemoryLimitForParquet(40, false) } @@ -109,10 +124,11 @@ type arena interface { } type arenaPool struct { - arenas chan arena - allocated int - reuse bool - lock sync.Mutex + arenas chan arena + maxArenaCount int + allocated int + reuse bool + lock sync.Mutex } func (ap *arenaPool) adjustGCPercent() { @@ -123,8 +139,8 @@ func (ap *arenaPool) adjustGCPercent() { debug.SetGCPercent(100) return } - percent := int(memTotal)*90/(ap.allocated*defaultArenaSize) - 100 - percent = min(percent, 50) / 10 * 10 + percent := int(memTotal)*100/(ap.allocated*defaultArenaSize) - 100 + percent = min(percent, 50) percent = max(percent, 5) old := debug.SetGCPercent(percent) @@ -139,6 +155,20 @@ func (ap *arenaPool) adjustGCPercent() { } } +func (ap *arenaPool) adjustMaxArenaCount(newCount int) { + ap.lock.Lock() + defer ap.lock.Unlock() + + ap.maxArenaCount = newCount + for ap.allocated > newCount && len(ap.arenas) > 0 { + a := <-ap.arenas + a.reset() + ap.allocated-- + } + + ap.adjustGCPercent() +} + func (ap *arenaPool) get() arena { // First try to get cached arena select { @@ -151,7 +181,7 @@ func (ap *arenaPool) get() arena { defer ap.lock.Unlock() // Create a new one and return - if ap.allocated < maxArenaCount { + if ap.allocated < ap.maxArenaCount { ap.allocated++ bd := GetArena(defaultArenaSize) ap.adjustGCPercent() @@ -166,19 +196,14 @@ func (ap *arenaPool) put(a arena) { ap.lock.Lock() defer ap.lock.Unlock() - // discard it if necessary - if ap.allocated > maxArenaCount { - a.reset() - ap.adjustGCPercent() + if ap.reuse && ap.allocated <= ap.maxArenaCount { + ap.arenas <- a return } - if ap.reuse { - ap.arenas <- a - } else { - ap.allocated-- - ap.adjustGCPercent() - } + a.reset() + ap.allocated-- + ap.adjustGCPercent() } func (ap *arenaPool) free() { @@ -198,9 +223,6 @@ var globalPool *arenaPool type defaultAllocator struct { arenas []arena allocatedBuf map[uintptr]int - - allocatedOutside atomic.Int64 - allocatedOutsideNum atomic.Int64 } func (alloc *defaultAllocator) init() { @@ -215,15 +237,9 @@ func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { } } - // If global pool is initialized, get arena from the pool. - // Otherwise, we just create a new one. - var na arena - if globalPool != nil { - if na = globalPool.get(); na == nil { - return make([]byte, size) - } - } else { - na = GetArena(defaultArenaSize) + na := globalPool.get() + if na == nil { + return make([]byte, size) } buf := na.allocate(size) @@ -233,7 +249,7 @@ func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { } func (alloc *defaultAllocator) Free(buf []byte) { - addr := addressOf(buf[:1]) + addr := addressOf(buf) if arenaID, ok := alloc.allocatedBuf[addr]; ok { alloc.arenas[arenaID].free(buf) delete(alloc.allocatedBuf, addr) @@ -246,36 +262,17 @@ func (alloc *defaultAllocator) Reallocate(size int, buf []byte, tp memory.Buffer } func (alloc *defaultAllocator) Close() { - // If global pool is initialized, return allocated arena to the pool. - if globalPool != nil { - for _, a := range alloc.arenas { - a.reset() - globalPool.put(a) - } + for _, a := range alloc.arenas { + a.reset() + globalPool.put(a) } alloc.arenas = nil } -// GetDefaultAllocator get a default allocator -func GetDefaultAllocator() memory.Allocator { +// GetAllocator get a default allocator +func GetAllocator() memory.Allocator { a := &defaultAllocator{} a.init() return a } - -// InitializeGlobalArena initialize a global arena pool. -// If you call this function, remember to call FreeMemory. -func InitializeGlobalArena(size int, reuse bool) { - maxArenaCount = size / defaultArenaSize - globalPool = &arenaPool{} - globalPool.reuse = reuse - globalPool.arenas = make(chan arena, maxArenaCount) -} - -// FreeMemory free all the memory allocated for arenas. -func FreeMemory() { - if globalPool != nil { - globalPool.free() - } -} diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 5473012b2134d..5ad1c3229dcbb 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1159,7 +1159,7 @@ func testSampleParquetDataSize(t *testing.T, count int) { err = store.WriteFile(ctx, fileName, bf.Bytes()) require.NoError(t, err) - rowSize, _, _, err := md.SampleParquetFileProperty(ctx, md.SourceFileMeta{ + rowSize, _, _, err := md.SampleStatisticsFromParquet(ctx, md.SourceFileMeta{ Path: fileName, }, store) require.NoError(t, err) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 1187884b55e63..b8e6157c475d1 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -15,7 +15,6 @@ package mydump import ( - "bytes" "context" "encoding/base64" "fmt" @@ -41,10 +40,7 @@ import ( const ( defaultBatchSize = 128 - // if a parquet if small than this threshold, parquet will load the whole file in a byte slice to - // optimize the read performance - smallParquetFileThreshold = 256 * 1024 * 1024 - defaultBufSize = 64 * 1024 + defaultBufSize = 64 * 1024 utcTimeLayout = "2006-01-02 15:04:05.999999Z" timeLayout = "2006-01-02 15:04:05.999999" @@ -234,33 +230,6 @@ func formatTime(v int64, unit string, format, utcFormat string, utc bool) string return t.Format(format) } -// bytesReaderWrapper is a wrapper of bytes.Reader. -type bytesReaderWrapper struct { - *bytes.Reader - rawBytes []byte - // current file path - path string -} - -func (*bytesReaderWrapper) Close() error { - return nil -} - -func (*bytesReaderWrapper) Write(_ []byte) (n int, err error) { - return 0, errors.New("unsupported operation") -} - -func (r *bytesReaderWrapper) Open(name string) (parquet.ReaderAtSeeker, error) { - if len(name) > 0 && name != r.path { - panic(fmt.Sprintf("Open with a different name is not supported! current: '%s', new: '%s'", r.path, name)) - } - return &bytesReaderWrapper{ - Reader: bytes.NewReader(r.rawBytes), - rawBytes: r.rawBytes, - path: r.path, - }, nil -} - // parquetFileWrapper is a wrapper for storage.ReadSeekCloser // It implements io.ReaderAt interface to read parquet file using arrow-go. type parquetFileWrapper struct { @@ -827,18 +796,6 @@ func OpenParquetReader( path string, size int64, ) (storage.ReadSeekCloser, error) { - if size <= smallParquetFileThreshold { - fileBytes, err := store.ReadFile(ctx, path) - if err != nil { - return nil, err - } - return &bytesReaderWrapper{ - Reader: bytes.NewReader(fileBytes), - rawBytes: fileBytes, - path: path, - }, nil - } - r, err := store.Open(ctx, path, nil) if err != nil { return nil, err @@ -941,14 +898,14 @@ func NewParquetParser( memoryUsage = meta.MemoryUsageStream meta.UseStreaming = true } - memoryUsage = min(memoryUsage, memLimit) - memLimiter.Acquire(memoryUsage) + memoryUsage = min(memoryUsage, readerMemoryLimit) + readerMemoryLimiter.Acquire(memoryUsage) log.FromContext(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory usage full", fmt.Sprintf("%d MB", meta.MemoryUsageFull>>20)), zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryQuota>>20)), - zap.String("memory limit", fmt.Sprintf("%d MB", memLimit>>20)), + zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), zap.Bool("streaming mode", meta.UseStreaming), zap.Bool("use sample allocator", meta.UseSampleAllocator), ) @@ -968,7 +925,7 @@ func NewParquetParser( if meta.UseSampleAllocator { allocator = &sampleAllocator{} } else { - alloc := GetDefaultAllocator() + alloc := GetAllocator() allocator = alloc } @@ -1026,7 +983,7 @@ func NewParquetParser( logger: log.FromContext(ctx), base64: meta.Base64, memoryUsage: memoryUsage, - memLimiter: memLimiter, + memLimiter: readerMemoryLimiter, } if err := parser.Init(); err != nil { return nil, errors.Trace(err) From b6681a16a3cadc18dcf7d173f9bba8437f87aec6 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 16:30:57 +0800 Subject: [PATCH 53/93] update build --- pkg/lightning/mydump/parquet_parser.go | 10 +++++++--- pkg/lightning/mydump/parser.go | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index b8e6157c475d1..817b4d92fe4b1 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -38,8 +38,13 @@ import ( ) const ( + // defaultBatchSize is the number of rows fetched each time in the parquet reader defaultBatchSize = 128 + // defaultBufSize specifies the default size of skip buffer. + // Skip buffer is used when reading data from the cloud. If there is a gap between the current + // read position and the last read position, if the gap size is less than the buffer size, + // these data is stored in this buffer to avoid reopening the underlying file. defaultBufSize = 64 * 1024 utcTimeLayout = "2006-01-02 15:04:05.999999Z" @@ -61,7 +66,7 @@ type columnDumper struct { valueBuffer any } -func createcolumnDumper(tp parquet.Type) *columnDumper { +func createDumper(tp parquet.Type) *columnDumper { batchSize := 128 var valueBuffer any @@ -500,7 +505,7 @@ func (pp *ParquetParser) Init() error { pp.dumpers = make([]*columnDumper, numCols) for i := 0; i < numCols; i++ { - pp.dumpers[i] = createcolumnDumper(meta.Schema.Column(i).PhysicalType()) + pp.dumpers[i] = createDumper(meta.Schema.Column(i).PhysicalType()) } return nil @@ -794,7 +799,6 @@ func OpenParquetReader( ctx context.Context, store storage.ExternalStorage, path string, - size int64, ) (storage.ReadSeekCloser, error) { r, err := store.Open(ctx, path, nil) if err != nil { diff --git a/pkg/lightning/mydump/parser.go b/pkg/lightning/mydump/parser.go index a47ee4ba76ff0..676183dd5278c 100644 --- a/pkg/lightning/mydump/parser.go +++ b/pkg/lightning/mydump/parser.go @@ -679,7 +679,7 @@ func OpenReader( ) (reader storage.ReadSeekCloser, err error) { switch { case fileMeta.Type == SourceTypeParquet: - reader, err = OpenParquetReader(ctx, store, fileMeta.Path, fileMeta.FileSize) + reader, err = OpenParquetReader(ctx, store, fileMeta.Path) case fileMeta.Compression != CompressionNone: compressType, err2 := ToStorageCompressType(fileMeta.Compression) if err2 != nil { From 016d1e5d5ad04617e627f1651101381bc9c4cb08 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 17:03:22 +0800 Subject: [PATCH 54/93] fix test --- go.sum | 2 -- pkg/lightning/mydump/parquet_parser.go | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/go.sum b/go.sum index 8fd33cb337d08..a87636113ec1f 100644 --- a/go.sum +++ b/go.sum @@ -516,8 +516,6 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9 h1:LJGbjOFBrjYubt498ycNLCkXth989t1N9LjWdGuD36U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250107060625-e99480fe0ed9/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 h1:8QBwC5DOnNBqsXPpeGqD79FcYNTqVR6wDeczNpHLBpA= github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 817b4d92fe4b1..49f2c3e137da0 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -843,17 +843,17 @@ type sampleAllocator struct { } func (sa *sampleAllocator) Allocate(size int, tp memory.BufferType) []byte { - size = AllocSize(size) + allocSize := AllocSize(size) switch tp { case memory.BufferCompressed: - sa.maxCompressedLength = max(sa.maxCompressedLength, size) + sa.maxCompressedLength = max(sa.maxCompressedLength, allocSize) case memory.BufferDataPage: - sa.maxDataPage = max(sa.maxDataPage, size) + sa.maxDataPage = max(sa.maxDataPage, allocSize) case memory.BufferDictionary: // For each row group, we need to store all dictionary pages to decode data page. - sa.totalDictPage += size + sa.totalDictPage += allocSize default: - sa.otherAllocated += size + sa.otherAllocated += allocSize } return make([]byte, size) } From 5a02500810295d48ded9ced56301e955551d096e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 18 Feb 2025 17:26:50 +0800 Subject: [PATCH 55/93] fix test --- lightning/pkg/importer/table_import_test.go | 36 --------------------- 1 file changed, 36 deletions(-) diff --git a/lightning/pkg/importer/table_import_test.go b/lightning/pkg/importer/table_import_test.go index 150241a3d1030..901e59a1c1b24 100644 --- a/lightning/pkg/importer/table_import_test.go +++ b/lightning/pkg/importer/table_import_test.go @@ -2397,39 +2397,3 @@ func TestGetDDLStatus(t *testing.T) { require.Equal(t, model.JobStateRunning, status.state) require.Equal(t, int64(123)+int64(456), status.rowCount) } - -func TestGetChunkCompressedSizeForParquet(t *testing.T) { - dir := "./testdata/" - fileName := "000000_0.parquet" - store, err := storage.NewLocalStorage(dir) - require.NoError(t, err) - - dataFiles := make([]mydump.FileInfo, 0) - dataFiles = append(dataFiles, mydump.FileInfo{ - TableName: filter.Table{Schema: "db", Name: "table"}, - FileMeta: mydump.SourceFileMeta{ - Path: fileName, - Type: mydump.SourceTypeParquet, - Compression: mydump.CompressionNone, - SortKey: "99", - FileSize: 192, - }, - }) - - chunk := checkpoints.ChunkCheckpoint{ - Key: checkpoints.ChunkCheckpointKey{Path: dataFiles[0].FileMeta.Path, Offset: 0}, - FileMeta: dataFiles[0].FileMeta, - Chunk: mydump.Chunk{ - Offset: 0, - EndOffset: 192, - PrevRowIDMax: 0, - RowIDMax: 100, - }, - } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - compressedSize, err := getChunkCompressedSizeForParquet(ctx, &chunk, store) - require.NoError(t, err) - require.Equal(t, compressedSize, int64(192)) -} From 9f7481803c075df94e6072ed08b3286f0a9cc15c Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 19 Feb 2025 15:34:55 +0800 Subject: [PATCH 56/93] update allocator and comments --- pkg/lightning/mydump/allocator_test.go | 7 +- pkg/lightning/mydump/parquet_parser.go | 64 ++++++++++-------- pkg/lightning/mydump/simple_allocator.go | 85 +++++++++++++++--------- 3 files changed, 91 insertions(+), 65 deletions(-) diff --git a/pkg/lightning/mydump/allocator_test.go b/pkg/lightning/mydump/allocator_test.go index a7e0bc08a2c6a..1c5f14b4c154e 100644 --- a/pkg/lightning/mydump/allocator_test.go +++ b/pkg/lightning/mydump/allocator_test.go @@ -24,9 +24,7 @@ import ( ) func TestSimpleAllocator(t *testing.T) { - alignSize = 1 << 10 - - totalSize := 1 << 20 + totalSize := 16 << 20 a := simpleAllocator{} a.init(totalSize) @@ -35,7 +33,7 @@ func TestSimpleAllocator(t *testing.T) { wg sync.WaitGroup ) - allocSize := []int{1 << 10, 2 << 10, 4 << 10, 8 << 10, 16 << 10, 32 << 10, 64 << 10, 128 << 10} + allocSize := []int{16 << 10, 32 << 10, 64 << 10, 128 << 10, 256 << 10, 512 << 10} ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) defer cancel() @@ -52,6 +50,7 @@ func TestSimpleAllocator(t *testing.T) { buf := a.allocate(bufSize) lk.Unlock() + // hold for sometimes time.Sleep(time.Millisecond) lk.Lock() diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 49f2c3e137da0..39fccdb473d47 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -43,14 +43,15 @@ const ( // defaultBufSize specifies the default size of skip buffer. // Skip buffer is used when reading data from the cloud. If there is a gap between the current - // read position and the last read position, if the gap size is less than the buffer size, - // these data is stored in this buffer to avoid reopening the underlying file. + // read position and the last read position, these data is stored in this buffer to avoid + // potentially reopening the underlying file when the gap size is less than the buffer size. defaultBufSize = 64 * 1024 utcTimeLayout = "2006-01-02 15:04:05.999999Z" timeLayout = "2006-01-02 15:04:05.999999" ) +// columnDumper is a helper struct to read data from one column. type columnDumper struct { reader file.ColumnChunkReader batchSize int64 @@ -242,17 +243,15 @@ type parquetFileWrapper struct { storage.ReadSeekCloser lastOff int64 - bufSize int - buf []byte + skipBuf []byte // current file path and store, used to open file store storage.ExternalStorage path string } -func (pf *parquetFileWrapper) InitBuffer(bufSize int) { - pf.bufSize = bufSize - pf.buf = make([]byte, bufSize) +func (pf *parquetFileWrapper) Init(bufSize int) { + pf.skipBuf = make([]byte, bufSize) } func (pf *parquetFileWrapper) readNBytes(p []byte) (int, error) { @@ -271,13 +270,13 @@ func (pf *parquetFileWrapper) ReadAt(p []byte, off int64) (int, error) { // We want to minimize the number of Seek call as much as possible, // since the underlying reader may require reopening the file. gap := int(off - pf.lastOff) - if gap < 0 || gap > pf.bufSize { + if gap < 0 || gap > cap(pf.skipBuf) { if _, err := pf.Seek(off, io.SeekStart); err != nil { return 0, err } } else { - pf.buf = pf.buf[:gap] - if read, err := pf.readNBytes(pf.buf); err != nil { + pf.skipBuf = pf.skipBuf[:gap] + if read, err := pf.readNBytes(pf.skipBuf); err != nil { return read, err } } @@ -317,7 +316,7 @@ func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) ctx: pf.ctx, path: name, } - newPf.InitBuffer(defaultBufSize) + newPf.Init(defaultBufSize) return newPf, nil } @@ -330,10 +329,10 @@ type ParquetParser struct { alloc memory.Allocator - // colBuffers is used to store raw data read from parquet columns. - // rows stores the actual data after parsing. dumpers []*columnDumper - rows [][]types.Datum + + // rows stores the actual data after parsing. + rows [][]types.Datum // curIdx and avail is the current index and total number of rows in rows buffer curIdx int @@ -346,6 +345,7 @@ type ParquetParser struct { totalRowsInGroup int // total rows in current group curRows int // number of rows read in total totalRows int // total rows in this file + totalBytesRead int // total bytes read, estimated by all the read datum. lastRow Row logger log.Logger @@ -560,6 +560,10 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { pp.curRowInGroup += curRead } + for i := 0; i < readNum; i++ { + pp.totalBytesRead += estimateRowSize(pp.rows[i]) + } + pp.curRows += readNum pp.curIdx, pp.avail = 0, readNum return readNum, nil @@ -683,10 +687,9 @@ func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { } // ScannedPos implements the Parser interface. -// For parquet it's nonsense to get the position of internal reader, -// thus it will return the number of rows read. +// For parquet we use the size of all read datum to estimate the scanned positon. func (pp *ParquetParser) ScannedPos() (int64, error) { - return int64(pp.curRows), nil + return int64(pp.totalBytesRead), nil } // Close closes the parquet file of the parser. @@ -713,7 +716,7 @@ func (pp *ParquetParser) Close() error { } // GetRow get the the current row. -// Return error if can't read next row. +// Return error if we can't read next row. // User should call ReadRow before calling this. func (pp *ParquetParser) GetRow() ([]types.Datum, error) { if pp.curIdx >= pp.avail { @@ -749,21 +752,26 @@ func (pp *ParquetParser) ReadRow() error { return nil } -// LastRow gets the last row parsed by the parser. -// It implements the Parser interface. -func (pp *ParquetParser) LastRow() Row { - pp.lastRow.Length = 0 - for _, v := range pp.lastRow.Row { +func estimateRowSize(row []types.Datum) int { + length := 0 + for _, v := range row { if v.IsNull() { continue } if v.Kind() == types.KindString { // use GetBytes to avoid memory allocation - pp.lastRow.Length += len(v.GetBytes()) + length += len(v.GetBytes()) } else { - pp.lastRow.Length += 8 + length += 8 } } + return length +} + +// LastRow gets the last row parsed by the parser. +// It implements the Parser interface. +func (pp *ParquetParser) LastRow() Row { + pp.lastRow.Length = estimateRowSize(pp.lastRow.Row) return pp.lastRow } @@ -811,7 +819,7 @@ func OpenParquetReader( ctx: ctx, path: path, } - pf.InitBuffer(defaultBufSize) + pf.Init(defaultBufSize) return pf, nil } @@ -922,7 +930,7 @@ func NewParquetParser( ctx: ctx, path: path, } - wrapper.InitBuffer(defaultBufSize) + wrapper.Init(defaultBufSize) } var allocator memory.Allocator @@ -1018,7 +1026,7 @@ func SampleStatisticsFromParquet( ctx: ctx, path: fileMeta.Path, } - wrapper.InitBuffer(defaultBufSize) + wrapper.Init(defaultBufSize) prop := parquet.NewReaderProperties(nil) prop.BufferedStreamEnabled = true diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 4a4bc12d0a133..a562f3003b142 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -21,27 +21,19 @@ import ( ) const ( + // Size of metadata of each block metaSize = 64 invalid = math.MaxInt32 -) -// This value will be modifed in test -var alignSize = 16 << 10 + // The allocated memory size will be aligned to the nearest multiple of alignSize. + // This value will be modifed in test + alignSize = 16 << 10 +) func roundUp(n, sz int) int { return (n + sz - 1) / sz * sz } -func simpleGetAllocationSize(size int) int { - return roundUp(size+metaSize, alignSize) * 2 -} - -func getSimpleAllocator(size int) arena { - a := &simpleAllocator{} - a.init(size) - return a -} - func storeInt(value int, buf []byte) { buf[0] = byte(value >> 24) buf[1] = byte(value >> 16) @@ -53,12 +45,40 @@ func readInt(buf []byte) int { return int(buf[0])<<24 | int(buf[1])<<16 | int(buf[2])<<8 | int(buf[3]) } +func simpleGetAllocationSize(size int) int { + return roundUp(size+metaSize, alignSize) * 2 +} + +/* +simpleAllocator is a very simple allocator with low allocation efficiency +which manages allocated memory using a linked list structure. + +It is used in parquet reader and it's sufficient for our scenario +as memory allocation will not be a bottleneck. + +The memory layout is as follows: + + --------------------| + | v + ------------------------------------------------------------------------- + | | s | p | n | xxxx | | s | p | n | xxxx | | + ------------------------------------------------------------------------- + ^ | + |___________________________________| +*/ type simpleAllocator struct { - buf []byte - base int - numAlloc int - firstFree int - alloc int + buf []byte + base int + + // Number of blocks and bytes allocated + blocksAlloc int + bytesAloc int +} + +func getSimpleAllocator(size int) arena { + a := &simpleAllocator{} + a.init(size) + return a } func (sa *simpleAllocator) init(bufSize int) { @@ -91,23 +111,23 @@ func (sa *simpleAllocator) getBlk(offset int) (prev, next, blkSize int) { } func (sa *simpleAllocator) insertFree(free int) { - for offset := sa.firstFree; offset != invalid; { + for offset := 0; offset != invalid; { if free > offset { _, _, blkSize := sa.getBlk(free) _, next, _ := sa.getBlk(offset) sa.setBlk(offset, -1, free, -1) sa.setBlk(free, offset, next, -1) sa.setBlk(next, free, -1, -1) - sa.alloc -= blkSize + sa.bytesAloc -= blkSize return } } panic("Error insertFree") } -// Merge adjacent free blocks into one big free block +// Merge adjacent free blocks into one big free block to reduce fragmentation. func (sa *simpleAllocator) merge() { - for offset := sa.firstFree; offset != invalid; { + for offset := 0; offset != invalid; { _, next, blkSize := sa.getBlk(offset) if offset+blkSize == next { _, nextnext, nextBlkSize := sa.getBlk(next) @@ -127,7 +147,7 @@ func (sa *simpleAllocator) allocate(size int) []byte { bestOffset := -1 minRemain := math.MaxInt32 - for offset := sa.firstFree; offset != invalid; { + for offset := 0; offset != invalid; { _, next, blkSize := sa.getBlk(offset) if offset+blkSize >= len(sa.buf) { panic("Error blk size") @@ -154,8 +174,8 @@ func (sa *simpleAllocator) allocate(size int) []byte { sa.setBlk(bestOffset, -1, -1, minRemain) } - sa.numAlloc++ - sa.alloc += allocSize + sa.blocksAlloc++ + sa.bytesAloc += allocSize bufStart := bestOffset + minRemain sa.setBlk(bufStart, -1, -1, allocSize) sa.sanityCheck() @@ -168,8 +188,8 @@ func (sa *simpleAllocator) free(buf []byte) { return } - sa.numAlloc-- - if sa.numAlloc == 0 { + sa.blocksAlloc-- + if sa.blocksAlloc == 0 { sa.reset() return } @@ -184,7 +204,7 @@ func (sa *simpleAllocator) reallocate(buf []byte, size int) []byte { } func (sa *simpleAllocator) allocated() int64 { - return int64(sa.numAlloc) + return int64(sa.blocksAlloc) } func (sa *simpleAllocator) sanityCheck() { @@ -192,8 +212,8 @@ func (sa *simpleAllocator) sanityCheck() { return } - mem := sa.alloc - for offset := sa.firstFree; offset != invalid; { + mem := sa.bytesAloc + for offset := 0; offset != invalid; { _, next, blkSize := sa.getBlk(offset) mem += blkSize offset = next @@ -204,12 +224,11 @@ func (sa *simpleAllocator) sanityCheck() { } func (sa *simpleAllocator) reset() { - sa.alloc = 0 + sa.bytesAloc = 0 - // Add dummy head and tail + // Add dummy head and tail block to simplify the allocation logic total := len(sa.buf) sa.setBlk(0, invalid, alignSize, 0) sa.setBlk(alignSize, 0, total-alignSize, total-alignSize*3) sa.setBlk(total-alignSize, alignSize, invalid, 0) - sa.firstFree = 0 } From 30e4d321fc950257f395a52276fad194f2ed2947 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 20 Feb 2025 09:55:05 +0800 Subject: [PATCH 57/93] revert for test build --- pkg/disttask/importinto/encode_and_sort_operator.go | 6 +++--- pkg/lightning/backend/external/merge.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 6b3d43510a89a..177a01bb44f5f 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -234,9 +234,9 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) // For parquet file format, we allocate 50% of the memory to file reader. - if plan.Format == "parquet" { - memPerCon /= 2 - } + // if plan.Format == "parquet" { + // memPerCon /= 2 + // } // we use half of the total available memory for data writer, and the other half // for encoding and other stuffs, it's an experience value, might not optimal. // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer diff --git a/pkg/lightning/backend/external/merge.go b/pkg/lightning/backend/external/merge.go index cc0035c186811..503b037d542a6 100644 --- a/pkg/lightning/backend/external/merge.go +++ b/pkg/lightning/backend/external/merge.go @@ -30,7 +30,7 @@ var ( // MaxMergingFilesPerThread is the maximum number of files that can be merged by a // single thread. This value comes from the fact that 16 threads are ok to merge 4k // files in parallel, so we set it to 250. - MaxMergingFilesPerThread = 120 + MaxMergingFilesPerThread = 250 // MinUploadPartSize is the minimum size of each part when uploading files to // external storage, which is 5MiB for both S3 and GCS. MinUploadPartSize int64 = 5 * units.MiB From 9b324ee26960c3027c59f0be507db9dacacd62c1 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 20 Feb 2025 09:57:00 +0800 Subject: [PATCH 58/93] revert for test build --- pkg/disttask/importinto/encode_and_sort_operator.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 177a01bb44f5f..6b3d43510a89a 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -234,9 +234,9 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) // For parquet file format, we allocate 50% of the memory to file reader. - // if plan.Format == "parquet" { - // memPerCon /= 2 - // } + if plan.Format == "parquet" { + memPerCon /= 2 + } // we use half of the total available memory for data writer, and the other half // for encoding and other stuffs, it's an experience value, might not optimal. // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer From 1cf646647799d10ed067e5580cf35e9d4f7fd8dd Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 20 Feb 2025 16:30:43 +0800 Subject: [PATCH 59/93] update allocator and GC --- pkg/disttask/importinto/task_executor.go | 11 ++ pkg/lightning/mydump/allocator.go | 186 ++++++----------------- pkg/lightning/mydump/simple_allocator.go | 23 +-- 3 files changed, 70 insertions(+), 150 deletions(-) diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index ccaac310cf4f9..4d3ea780f4601 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -39,6 +39,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/config" "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/lightning/metric" + "github.com/pingcap/tidb/pkg/lightning/mydump" "github.com/pingcap/tidb/pkg/lightning/verification" "github.com/pingcap/tidb/pkg/meta/autoid" "github.com/pingcap/tidb/pkg/table/tables" @@ -96,6 +97,12 @@ func getTableImporter( func (s *importStepExecutor) Init(ctx context.Context) error { s.logger.Info("init subtask env") + + if s.taskMeta.Plan.Format == importer.DataFormatParquet { + // For `IMPORT INTO format "parquet"`, we set the memory usage for parquet reader to 40%. + mydump.SetMemoryLimitForParquet(40) + } + tableImporter, err := getTableImporter(ctx, s.taskID, s.taskMeta, s.store) if err != nil { return err @@ -272,6 +279,10 @@ func (s *importStepExecutor) onFinished(ctx context.Context, subtask *proto.Subt } func (s *importStepExecutor) Cleanup(_ context.Context) (err error) { + if s.taskMeta.Plan.Format == importer.DataFormatParquet { + mydump.FreeMemoryForParquet() + } + s.logger.Info("cleanup subtask env") s.importCancel() s.wg.Wait() diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index a46d7e9345b2c..6e9e37efd46c3 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -16,8 +16,6 @@ package mydump import ( "math" - "os" - "runtime" "runtime/debug" "sync" "unsafe" @@ -37,38 +35,32 @@ var ( readerMemoryLimit int readerMemoryLimiter *membuf.Limiter + // globalPool is used for all parquet import tasks. + // We use importCount to track and release memory. + lk sync.Mutex + globalPool *membuf.Pool + importCount int + // AllocSize returns actual allocated size from arena AllocSize func(int) int // GetArena creates a new arena - GetArena func(int) arena + GetArena func(*membuf.Buffer) arena ) -// InitializeGlobalArena initialize a global arena pool. -func InitializeGlobalArena(size int, reuse bool) { - maxArenaCount := size / defaultArenaSize - if globalPool == nil { - globalPool = &arenaPool{ - maxArenaCount: maxArenaCount, - reuse: reuse, - arenas: make(chan arena, 1024), - } +// SetMemoryLimitForParquet set the memory limit for parquet reader. +// If reuse = true, remember to call FreeMemory to free the memory. +func SetMemoryLimitForParquet(percent int) { + lk.Lock() + defer lk.Unlock() + + importCount++ + if importCount > 1 { return } - globalPool.adjustMaxArenaCount(maxArenaCount) -} + debug.SetGCPercent(50) -// FreeMemory free all the memory allocated for arenas. -func FreeMemory() { - if globalPool != nil { - globalPool.free() - } -} - -// SetMemoryLimitForParquet set the memory limit for parquet reader. -// If reuse = true, remember to call FreeMemory to free the memory. -func SetMemoryLimitForParquet(percent int, reuse bool) { memTotal, err := tidbmemory.MemTotal() if err != nil { // Set limit to int max, which means no limiter @@ -76,7 +68,12 @@ func SetMemoryLimitForParquet(percent int, reuse bool) { } readerMemoryLimit = int(memTotal) * min(percent, 90) / 100 readerMemoryLimiter = membuf.NewLimiter(readerMemoryLimit) - InitializeGlobalArena(readerMemoryLimit, reuse) + + globalPool = membuf.NewPool( + // membuf.WithAllocator(manual.Allocator{}), + membuf.WithBlockNum(readerMemoryLimit/defaultArenaSize), + membuf.WithBlockSize(defaultArenaSize), + ) log.L().Info("set memory limit", zap.Int("total memory", int(memTotal)), @@ -84,6 +81,17 @@ func SetMemoryLimitForParquet(percent int, reuse bool) { ) } +func FreeMemoryForParquet() { + lk.Lock() + defer lk.Unlock() + + importCount-- + if importCount == 0 { + globalPool.Destroy() + debug.SetGCPercent(100) + } +} + // GetMemoryQuota get the memory quota for non-streaming mode read. // TODO(joechenrh): set a more proper memory quota func GetMemoryQuota(concurrency int) int { @@ -100,10 +108,6 @@ func GetMemoryQuota(concurrency int) int { func init() { AllocSize = simpleGetAllocationSize GetArena = getSimpleAllocator - - // This is used for `IMPORT INTO`. - // We set the default memory usage to 40% and don't reuse arenas. - SetMemoryLimitForParquet(40, false) } // Get the address of a buffer, return 0 if the buffer is nil @@ -123,110 +127,11 @@ type arena interface { reset() } -type arenaPool struct { - arenas chan arena - maxArenaCount int - allocated int - reuse bool - lock sync.Mutex -} - -func (ap *arenaPool) adjustGCPercent() { - gogc := os.Getenv("GOGC") - memTotal, err := tidbmemory.MemTotal() - if gogc == "" && err == nil { - if ap.allocated == 0 { - debug.SetGCPercent(100) - return - } - percent := int(memTotal)*100/(ap.allocated*defaultArenaSize) - 100 - percent = min(percent, 50) - percent = max(percent, 5) - - old := debug.SetGCPercent(percent) - //nolint: all_revive,revive - runtime.GC() - log.L().Debug("set gc percentage", - zap.Int("old", old), - zap.Int("new", percent), - zap.Int("total memory", int(memTotal)), - zap.Int("allocated memory", ap.allocated*defaultArenaSize), - ) - } -} - -func (ap *arenaPool) adjustMaxArenaCount(newCount int) { - ap.lock.Lock() - defer ap.lock.Unlock() - - ap.maxArenaCount = newCount - for ap.allocated > newCount && len(ap.arenas) > 0 { - a := <-ap.arenas - a.reset() - ap.allocated-- - } - - ap.adjustGCPercent() -} - -func (ap *arenaPool) get() arena { - // First try to get cached arena - select { - case a := <-ap.arenas: - return a - default: - } - - ap.lock.Lock() - defer ap.lock.Unlock() - - // Create a new one and return - if ap.allocated < ap.maxArenaCount { - ap.allocated++ - bd := GetArena(defaultArenaSize) - ap.adjustGCPercent() - return bd - } - - // We can't create new arena, return nil - return nil -} - -func (ap *arenaPool) put(a arena) { - ap.lock.Lock() - defer ap.lock.Unlock() - - if ap.reuse && ap.allocated <= ap.maxArenaCount { - ap.arenas <- a - return - } - - a.reset() - ap.allocated-- - ap.adjustGCPercent() -} - -func (ap *arenaPool) free() { - ap.lock.Lock() - defer ap.lock.Unlock() - - ap.allocated = 0 - for len(ap.arenas) > 0 { - a := <-ap.arenas - a.reset() - } - ap.adjustGCPercent() -} - -var globalPool *arenaPool - type defaultAllocator struct { - arenas []arena - allocatedBuf map[uintptr]int -} + arenas []arena + mbufs []*membuf.Buffer -func (alloc *defaultAllocator) init() { - alloc.allocatedBuf = make(map[uintptr]int, 8) + allocatedBuf map[uintptr]int } func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { @@ -236,12 +141,10 @@ func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { return buf } } + mbuf := globalPool.NewBuffer() + alloc.mbufs = append(alloc.mbufs, mbuf) - na := globalPool.get() - if na == nil { - return make([]byte, size) - } - + na := GetArena(mbuf) buf := na.allocate(size) alloc.allocatedBuf[addressOf(buf)] = len(alloc.arenas) alloc.arenas = append(alloc.arenas, na) @@ -264,15 +167,16 @@ func (alloc *defaultAllocator) Reallocate(size int, buf []byte, tp memory.Buffer func (alloc *defaultAllocator) Close() { for _, a := range alloc.arenas { a.reset() - globalPool.put(a) } - + for _, mbuf := range alloc.mbufs { + mbuf.Destroy() + } alloc.arenas = nil } // GetAllocator get a default allocator func GetAllocator() memory.Allocator { - a := &defaultAllocator{} - a.init() - return a + return &defaultAllocator{ + allocatedBuf: make(map[uintptr]int, 32), + } } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index a562f3003b142..482b62babb8fc 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -17,6 +17,7 @@ package mydump import ( "math" + "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/util/intest" ) @@ -75,16 +76,20 @@ type simpleAllocator struct { bytesAloc int } -func getSimpleAllocator(size int) arena { - a := &simpleAllocator{} - a.init(size) - return a -} +func getSimpleAllocator(mbuf *membuf.Buffer) arena { + var buf []byte + if mbuf != nil { + buf = mbuf.AllocBytes(defaultArenaSize) + } else { + buf = make([]byte, defaultArenaSize) + } -func (sa *simpleAllocator) init(bufSize int) { - sa.buf = make([]byte, bufSize) - sa.base = int(addressOf(sa.buf)) - sa.reset() + a := &simpleAllocator{ + buf: buf, + base: int(addressOf(buf)), + } + a.reset() + return a } func (sa *simpleAllocator) getOffset(buf []byte) int { From 577812c9c5dfe80280d04f4c3cb8706dce70879c Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 20 Feb 2025 17:05:02 +0800 Subject: [PATCH 60/93] fix build --- pkg/executor/importer/import.go | 3 +-- pkg/lightning/mydump/allocator.go | 4 +--- pkg/lightning/mydump/parquet_parser.go | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index fc5aa6c344810..90eb70d321914 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1177,11 +1177,10 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } for _, dataFile := range dataFiles { // To reduce the memory usage, we only use streaming mode to read file. - // TODO(joechenrh): set a more proper memory quota dataFile.ParquetMeta = mydump.ParquetFileMeta{ MemoryUsageStream: memoryUsage, MemoryUsageFull: memoryUsageFull, - MemoryQuota: mydump.GetMemoryQuota(runtime.NumCPU()), + MemoryQuota: mydump.GetMemoryQuota(e.ThreadCnt), UseStreaming: true, UseSampleAllocator: false, Base64: e.FieldsEncodedBy == config.FieldEncodeBase64, diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 6e9e37efd46c3..f028720e11169 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -93,13 +93,11 @@ func FreeMemoryForParquet() { } // GetMemoryQuota get the memory quota for non-streaming mode read. -// TODO(joechenrh): set a more proper memory quota func GetMemoryQuota(concurrency int) int { quotaPerTask := readerMemoryLimit / concurrency - // Because other part like encoder also need memory, + // Because other parts like encoder also consume memory, // we assume that the reader can use up to 80% of the memroy. - // Maybe we can have a more accurate estimation later. quotaPerReader := quotaPerTask * 8 / 10 quotaPerReader = quotaPerReader / defaultArenaSize * defaultArenaSize return quotaPerReader diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 39fccdb473d47..2968d1f51f3a5 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -687,7 +687,7 @@ func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { } // ScannedPos implements the Parser interface. -// For parquet we use the size of all read datum to estimate the scanned positon. +// For parquet we use the size of all read datum to estimate the scanned position. func (pp *ParquetParser) ScannedPos() (int64, error) { return int64(pp.totalBytesRead), nil } From 1b8bf906f20360f8fd98010ad8fdf9d9df95f7fb Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 20 Feb 2025 18:31:38 +0800 Subject: [PATCH 61/93] update --- pkg/lightning/mydump/parquet_parser.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 2968d1f51f3a5..c56582a921514 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -23,6 +23,7 @@ import ( "math/big" "reflect" "strings" + "sync/atomic" "time" "github.com/joechenrh/arrow-go/v18/arrow/memory" @@ -51,6 +52,8 @@ const ( timeLayout = "2006-01-02 15:04:05.999999" ) +var openedParser atomic.Int32 + // columnDumper is a helper struct to read data from one column. type columnDumper struct { reader file.ColumnChunkReader @@ -703,6 +706,8 @@ func (pp *ParquetParser) Close() error { if pp.memLimiter != nil { pp.memLimiter.Release(pp.memoryUsage) } + + openedParser.Add(-1) }() pp.resetReader() @@ -918,6 +923,7 @@ func NewParquetParser( zap.String("memory usage full", fmt.Sprintf("%d MB", meta.MemoryUsageFull>>20)), zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryQuota>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), + zap.Int32("opened parser", openedParser.Add(1)), zap.Bool("streaming mode", meta.UseStreaming), zap.Bool("use sample allocator", meta.UseSampleAllocator), ) From 1fffb007f7476e1725676b16bd1a0b7b1e122ed2 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 21 Feb 2025 17:08:49 +0800 Subject: [PATCH 62/93] remove some unused code --- lightning/pkg/importer/import.go | 2 +- pkg/executor/importer/import.go | 5 +++-- pkg/lightning/mydump/BUILD.bazel | 1 + pkg/lightning/mydump/allocator.go | 15 +++++++++------ pkg/lightning/mydump/simple_allocator.go | 4 ---- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index 5b5f033742f90..9b7c42030e162 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -1545,7 +1545,7 @@ func (rc *Controller) importTables(ctx context.Context) (finalErr error) { // All tables are read, we can free memory used for parquet. logTask.Info("Read table done, free memory and call GC") - mydump.FreeMemory() + mydump.FreeMemoryForParquet() postProgress = func() error { close(postProcessTaskChan) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 90eb70d321914..5453ca7bf5bed 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1170,7 +1170,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } // Fill memory usage info - if sourceType == mydump.SourceTypeParquet { + if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { _, memoryUsage, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) if err != nil { return errors.Trace(err) @@ -1187,7 +1187,8 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } } - // TODO(joechnerh): maybe we can adjust thread count for parquet here + // TODO(joechnerh): Maybe we can adjust thread count for parquet here, + // when we support global sort using thread < 8. } e.dataFiles = dataFiles diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 7457e881d5176..f4124976b9469 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -51,6 +51,7 @@ go_library( "@com_github_joechenrh_arrow_go_v18//parquet/schema", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_log//:log", "@com_github_spkg_bom//:bom", "@org_golang_x_sync//errgroup", "@org_golang_x_text//encoding", diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index f028720e11169..f4222ee4615c6 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -21,7 +21,7 @@ import ( "unsafe" "github.com/joechenrh/arrow-go/v18/arrow/memory" - "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/log" "github.com/pingcap/tidb/pkg/lightning/membuf" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" "go.uber.org/zap" @@ -49,7 +49,7 @@ var ( ) // SetMemoryLimitForParquet set the memory limit for parquet reader. -// If reuse = true, remember to call FreeMemory to free the memory. +// Remember to call FreeMemoryForParquet to free the memory. func SetMemoryLimitForParquet(percent int) { lk.Lock() defer lk.Unlock() @@ -59,18 +59,21 @@ func SetMemoryLimitForParquet(percent int) { return } - debug.SetGCPercent(50) - memTotal, err := tidbmemory.MemTotal() if err != nil { + log.L().Warn("Fail to get total memory") // Set limit to int max, which means no limiter memTotal = math.MaxInt32 } readerMemoryLimit = int(memTotal) * min(percent, 90) / 100 readerMemoryLimiter = membuf.NewLimiter(readerMemoryLimit) + gcPercent := (10000/percent - 100) / 10 * 10 + gcPercent = max(gcPercent, 10) + gcPercent = min(gcPercent, 50) + debug.SetGCPercent(gcPercent) + globalPool = membuf.NewPool( - // membuf.WithAllocator(manual.Allocator{}), membuf.WithBlockNum(readerMemoryLimit/defaultArenaSize), membuf.WithBlockSize(defaultArenaSize), ) @@ -78,6 +81,7 @@ func SetMemoryLimitForParquet(percent int) { log.L().Info("set memory limit", zap.Int("total memory", int(memTotal)), zap.Int("memory limit", readerMemoryLimit), + zap.Int("GC Percentage", gcPercent), ) } @@ -121,7 +125,6 @@ func addressOf(buf []byte) uintptr { type arena interface { allocate(int) []byte free([]byte) - allocated() int64 reset() } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 482b62babb8fc..9727918953bc7 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -208,10 +208,6 @@ func (sa *simpleAllocator) reallocate(buf []byte, size int) []byte { return sa.allocate(size) } -func (sa *simpleAllocator) allocated() int64 { - return int64(sa.blocksAlloc) -} - func (sa *simpleAllocator) sanityCheck() { if !intest.InTest { return From fdf45781037408c264a19f632c9bf0154871a29e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 26 Feb 2025 14:58:13 +0800 Subject: [PATCH 63/93] clean up --- pkg/disttask/importinto/task_executor.go | 2 -- pkg/executor/importer/import.go | 29 ------------------------ pkg/lightning/config/config.go | 11 --------- pkg/lightning/mydump/loader.go | 1 - pkg/lightning/mydump/parquet_parser.go | 13 ----------- 5 files changed, 56 deletions(-) diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index 4d3ea780f4601..d7ed67e4325d7 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -316,7 +316,6 @@ func (m *mergeSortStepExecutor) Init(ctx context.Context) error { } m.controller = controller dataKVMemSizePerCon, perIndexKVMemSizePerCon := getWriterMemorySizeLimit(m.GetResource(), &m.taskMeta.Plan) - // TODO(joechenrh): set MaxMergingFilesPerThread here? m.dataKVPartSize = max(external.MinUploadPartSize, int64(dataKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) m.indexKVPartSize = max(external.MinUploadPartSize, int64(perIndexKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) @@ -353,7 +352,6 @@ func (m *mergeSortStepExecutor) RunSubtask(ctx context.Context, subtask *proto.S if sm.KVGroup != dataKVGroup { partSize = m.indexKVPartSize } - // TODO(joechenrh): set MaxMergingFilesPerThread here? err = external.MergeOverlappingFiles( logutil.WithFields(ctx, zap.String("kv-group", sm.KVGroup), zap.Int64("subtask-id", subtask.ID)), sm.DataFiles, diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 5453ca7bf5bed..984fe5d933a8b 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -88,7 +88,6 @@ const ( fieldsEnclosedByOption = "fields_enclosed_by" fieldsEscapedByOption = "fields_escaped_by" fieldsDefinedNullByOption = "fields_defined_null_by" - fieldsEncodedByOption = "fields_encoded_by" linesTerminatedByOption = "lines_terminated_by" skipRowsOption = "skip_rows" splitFileOption = "split_file" @@ -115,7 +114,6 @@ var ( fieldsEnclosedByOption: true, fieldsEscapedByOption: true, fieldsDefinedNullByOption: true, - fieldsEncodedByOption: true, linesTerminatedByOption: true, skipRowsOption: true, splitFileOption: false, @@ -221,8 +219,6 @@ type Plan struct { Charset *string ImportantSysVars map[string]string - FieldsEncodedBy config.FieldEncodeType - // used for LOAD DATA and CSV format of IMPORT INTO FieldNullDef []string // this is not used in IMPORT INTO @@ -519,19 +515,6 @@ func (e *LoadDataController) checkFieldParams() error { if e.Format != DataFormatCSV && e.Format != DataFormatParquet && e.Format != DataFormatSQL { return exeerrors.ErrLoadDataUnsupportedFormat.GenWithStackByArgs(e.Format) } - if e.FieldsEncodedBy == config.FieldEncodeBase64 { - if e.Format == DataFormatCSV { - if e.FieldsEnclosedBy != "" { - return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_enclosed_by must be empty when fields_encoded_by is 'base64'") - } - if e.FieldsEscapedBy != "" { - return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_escaped_by must be empty when fields_encoded_by is 'base64'") - } - if e.Charset != nil && *e.Charset != "binary" { - return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("character_set must be 'binary' when fields_encoded_by is 'base64'") - } - } - } } else { if e.NullValueOptEnclosed && len(e.FieldsEnclosedBy) == 0 { return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("must specify FIELDS [OPTIONALLY] ENCLOSED BY when use NULL DEFINED BY OPTIONALLY ENCLOSED") @@ -671,17 +654,6 @@ func (p *Plan) initOptions(ctx context.Context, seCtx sessionctx.Context, option } p.FieldNullDef = []string{v} } - if opt, ok := specifiedOptions[fieldsEncodedByOption]; ok { - v, err := optAsString(opt) - if err != nil { - return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) - } - v = strings.ToLower(v) - if config.FieldEncodeType(v) != config.FieldEncodeBase64 { - return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) - } - p.FieldsEncodedBy = config.FieldEncodeType(v) - } if opt, ok := specifiedOptions[linesTerminatedByOption]; ok { v, err := optAsString(opt) // cannot set terminator to empty string explicitly @@ -1183,7 +1155,6 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { MemoryQuota: mydump.GetMemoryQuota(e.ThreadCnt), UseStreaming: true, UseSampleAllocator: false, - Base64: e.FieldsEncodedBy == config.FieldEncodeBase64, } } diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 7f91be4c40cb1..4e4f2408ab00d 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -814,17 +814,6 @@ func (s *StringOrStringSlice) UnmarshalTOML(in any) error { return nil } -// FieldEncodeType is the type of encoding for a CSV field. -type FieldEncodeType string - -const ( - // FieldEncodeNone means no special encoding. - FieldEncodeNone FieldEncodeType = "" - // FieldEncodeBase64 means the field is encoded in base64. - // this encoding also implies some constraints on other parameters - FieldEncodeBase64 FieldEncodeType = "base64" -) - // CSVConfig is the config for CSV files. type CSVConfig struct { // FieldsTerminatedBy, FieldsEnclosedBy and LinesTerminatedBy should all be in utf8mb4 encoding. diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 0760874b9445a..32c3e72785220 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -94,7 +94,6 @@ type ParquetFileMeta struct { MemoryQuota int // memory quota for current file reader to use non-streaming mode UseStreaming bool // whether use streaming mode UseSampleAllocator bool // whether use sample allocator - Base64 bool } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index c56582a921514..e27fd667a03c8 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -16,7 +16,6 @@ package mydump import ( "context" - "encoding/base64" "fmt" "io" "math" @@ -353,8 +352,6 @@ type ParquetParser struct { lastRow Row logger log.Logger - base64 bool - memoryUsage int memLimiter *membuf.Limiter } @@ -644,14 +641,6 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { continue } setFunc(storeOffset+i, col, val) - if pp.base64 { - var decoded []byte - decoded, err = base64.StdEncoding.DecodeString(pp.rows[storeOffset+i][col].GetString()) - if err != nil { - return 0, err - } - pp.rows[storeOffset+i][col].SetString(string(decoded), "utf8mb4_bin") - } } } @@ -999,7 +988,6 @@ func NewParquetParser( columnNames: columnNames, alloc: allocator, logger: log.FromContext(ctx), - base64: meta.Base64, memoryUsage: memoryUsage, memLimiter: readerMemoryLimiter, } @@ -1089,7 +1077,6 @@ func SampleStatisticsFromParquet( colMetas: columnMetas, columnNames: columnNames, logger: log.FromContext(ctx), - base64: fileMeta.ParquetMeta.Base64, } if err := parser.Init(); err != nil { return 0, 0, 0, errors.Trace(err) From 99d32e7af2cf397caa69ba827d001e925cf794c8 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sun, 2 Mar 2025 14:30:35 +0800 Subject: [PATCH 64/93] add prefetch size --- pkg/lightning/mydump/parquet_parser.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index e27fd667a03c8..eb411b7385046 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -307,7 +307,7 @@ func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) if len(name) == 0 { name = pf.path } - reader, err := pf.store.Open(pf.ctx, name, nil) + reader, err := pf.store.Open(pf.ctx, name, &storage.ReaderOption{PrefetchSize: 1 << 20}) if err != nil { return nil, errors.Trace(err) } @@ -802,7 +802,7 @@ func OpenParquetReader( store storage.ExternalStorage, path string, ) (storage.ReadSeekCloser, error) { - r, err := store.Open(ctx, path, nil) + r, err := store.Open(ctx, path, &storage.ReaderOption{PrefetchSize: 1 << 20}) if err != nil { return nil, err } @@ -1005,7 +1005,7 @@ func SampleStatisticsFromParquet( store storage.ExternalStorage, ) ( avgRowSize float64, - memoryUsage int, + memoryUsageStream int, memoryUsageFull int, err error, ) { @@ -1111,7 +1111,9 @@ func SampleStatisticsFromParquet( avgRowSize = float64(rowSize) / float64(rowCount) - memoryUsageStream, memoryUsageFull := 0, 0 + memoryUsageStream = len(columnMetas) << 20 + memoryUsageFull = len(columnMetas) << 20 + for _, alloc := range allSampleAllocators { memoryUsageFull += alloc.maxDataPage memoryUsageFull += alloc.totalDictPage From c75afdefd5b2dea8e24511221a51c545863aa3fb Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sun, 2 Mar 2025 17:03:47 +0800 Subject: [PATCH 65/93] fix build --- lightning/pkg/importer/import.go | 4 ++-- .../importinto/encode_and_sort_operator.go | 6 +++--- pkg/disttask/importinto/task_executor.go | 4 ++-- pkg/executor/importer/table_import.go | 1 + pkg/lightning/mydump/allocator.go | 14 +++++++++----- pkg/lightning/mydump/parquet_parser.go | 15 +++++++++++---- 6 files changed, 28 insertions(+), 16 deletions(-) diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index 9b7c42030e162..1f29b93cf895a 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -546,7 +546,7 @@ func (rc *Controller) Close() { func (rc *Controller) Run(ctx context.Context) error { failpoint.Inject("beforeRun", func() {}) - mydump.SetMemoryLimitForParquet(rc.cfg.App.MaxMemoryUsage, true) + mydump.ConfigureReaderLimitForParquet(rc.cfg.App.MaxMemoryUsage) opts := []func(context.Context) error{ rc.setGlobalVariables, @@ -1545,7 +1545,7 @@ func (rc *Controller) importTables(ctx context.Context) (finalErr error) { // All tables are read, we can free memory used for parquet. logTask.Info("Read table done, free memory and call GC") - mydump.FreeMemoryForParquet() + mydump.ReleaseMemoryForParquet() postProgress = func() error { close(postProcessTaskChan) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 6b3d43510a89a..5c0d617a62558 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -233,9 +233,9 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) indexKVGroupCnt := getNumOfIndexGenKV(plan.DesiredTableInfo) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) - // For parquet file format, we allocate 50% of the memory to file reader. - if plan.Format == "parquet" { - memPerCon /= 2 + // For parquet file format, we allocate 40% of the memory to file reader. + if plan.Format == importer.DataFormatParquet { + memPerCon = memPerCon * 3 / 5 } // we use half of the total available memory for data writer, and the other half // for encoding and other stuffs, it's an experience value, might not optimal. diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index d7ed67e4325d7..c95b0eb97216d 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -100,7 +100,7 @@ func (s *importStepExecutor) Init(ctx context.Context) error { if s.taskMeta.Plan.Format == importer.DataFormatParquet { // For `IMPORT INTO format "parquet"`, we set the memory usage for parquet reader to 40%. - mydump.SetMemoryLimitForParquet(40) + mydump.ConfigureReaderLimitForParquet(40) } tableImporter, err := getTableImporter(ctx, s.taskID, s.taskMeta, s.store) @@ -280,7 +280,7 @@ func (s *importStepExecutor) onFinished(ctx context.Context, subtask *proto.Subt func (s *importStepExecutor) Cleanup(_ context.Context) (err error) { if s.taskMeta.Plan.Format == importer.DataFormatParquet { - mydump.FreeMemoryForParquet() + mydump.ReleaseMemoryForParquet() } s.logger.Info("cleanup subtask env") diff --git a/pkg/executor/importer/table_import.go b/pkg/executor/importer/table_import.go index a4cd376b170e9..8cafd72dd4694 100644 --- a/pkg/executor/importer/table_import.go +++ b/pkg/executor/importer/table_import.go @@ -98,6 +98,7 @@ type Chunk struct { Type mydump.SourceType Compression mydump.Compression Timestamp int64 + ParquetMeta mydump.ParquetFileMeta } // prepareSortDir creates a new directory for import, remove previous sort directory if exists. diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index f4222ee4615c6..333b71710d4ec 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -16,6 +16,7 @@ package mydump import ( "math" + "runtime" "runtime/debug" "sync" "unsafe" @@ -48,9 +49,9 @@ var ( GetArena func(*membuf.Buffer) arena ) -// SetMemoryLimitForParquet set the memory limit for parquet reader. -// Remember to call FreeMemoryForParquet to free the memory. -func SetMemoryLimitForParquet(percent int) { +// ConfigureReaderLimitForParquet set the memory limit for parquet reader. +// Remember to call ReleaseMemoryForParquet to free the memory. +func ConfigureReaderLimitForParquet(percent int) { lk.Lock() defer lk.Unlock() @@ -65,7 +66,7 @@ func SetMemoryLimitForParquet(percent int) { // Set limit to int max, which means no limiter memTotal = math.MaxInt32 } - readerMemoryLimit = int(memTotal) * min(percent, 90) / 100 + readerMemoryLimit = int(memTotal) * min(percent, 75) / 100 readerMemoryLimiter = membuf.NewLimiter(readerMemoryLimit) gcPercent := (10000/percent - 100) / 10 * 10 @@ -85,7 +86,8 @@ func SetMemoryLimitForParquet(percent int) { ) } -func FreeMemoryForParquet() { +// ReleaseMemoryForParquet releases memory allocated for parquet readers. +func ReleaseMemoryForParquet() { lk.Lock() defer lk.Unlock() @@ -93,6 +95,8 @@ func FreeMemoryForParquet() { if importCount == 0 { globalPool.Destroy() debug.SetGCPercent(100) + //nolint: all_revive,revive + runtime.GC() } } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index eb411b7385046..8c6288bf32549 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -41,6 +41,10 @@ const ( // defaultBatchSize is the number of rows fetched each time in the parquet reader defaultBatchSize = 128 + // readerPrefetchSize is the prefetch size for each reader. + // 1M is sufficient for most small Parquet files. + readerPrefetchSize = 1 << 20 + // defaultBufSize specifies the default size of skip buffer. // Skip buffer is used when reading data from the cloud. If there is a gap between the current // read position and the last read position, these data is stored in this buffer to avoid @@ -307,7 +311,7 @@ func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) if len(name) == 0 { name = pf.path } - reader, err := pf.store.Open(pf.ctx, name, &storage.ReaderOption{PrefetchSize: 1 << 20}) + reader, err := pf.store.Open(pf.ctx, name, &storage.ReaderOption{PrefetchSize: readerPrefetchSize}) if err != nil { return nil, errors.Trace(err) } @@ -802,7 +806,7 @@ func OpenParquetReader( store storage.ExternalStorage, path string, ) (storage.ReadSeekCloser, error) { - r, err := store.Open(ctx, path, &storage.ReaderOption{PrefetchSize: 1 << 20}) + r, err := store.Open(ctx, path, &storage.ReaderOption{PrefetchSize: readerPrefetchSize}) if err != nil { return nil, err } @@ -1029,6 +1033,9 @@ func SampleStatisticsFromParquet( return 0, 0, 0, errors.Trace(err) } + //nolint: errcheck + defer reader.Close() + fileSchema := reader.MetaData().Schema columnMetas := make([]convertedType, fileSchema.NumColumns()) columnNames := make([]string, 0, fileSchema.NumColumns()) @@ -1111,8 +1118,8 @@ func SampleStatisticsFromParquet( avgRowSize = float64(rowSize) / float64(rowCount) - memoryUsageStream = len(columnMetas) << 20 - memoryUsageFull = len(columnMetas) << 20 + memoryUsageStream = len(columnMetas) * readerPrefetchSize + memoryUsageFull = readerPrefetchSize for _, alloc := range allSampleAllocators { memoryUsageFull += alloc.maxDataPage From db06ab0bc7a48046a1a7e79158c73992d4677217 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 26 Feb 2025 15:06:10 +0800 Subject: [PATCH 66/93] [test] add base64 for csv --- pkg/executor/importer/import.go | 29 ++++++++++++++++++++++++- pkg/lightning/config/config.go | 34 +++++++++++++++++++++++++++++- pkg/lightning/mydump/csv_parser.go | 13 ++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 3dd4f2aac0672..cb56212c6abf0 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -88,6 +88,7 @@ const ( fieldsEnclosedByOption = "fields_enclosed_by" fieldsEscapedByOption = "fields_escaped_by" fieldsDefinedNullByOption = "fields_defined_null_by" + fieldsEncodedByOption = "fields_encoded_by" linesTerminatedByOption = "lines_terminated_by" skipRowsOption = "skip_rows" splitFileOption = "split_file" @@ -114,6 +115,7 @@ var ( fieldsEnclosedByOption: true, fieldsEscapedByOption: true, fieldsDefinedNullByOption: true, + fieldsEncodedByOption: true, linesTerminatedByOption: true, skipRowsOption: true, splitFileOption: false, @@ -136,6 +138,7 @@ var ( fieldsEnclosedByOption: {}, fieldsEscapedByOption: {}, fieldsDefinedNullByOption: {}, + fieldsEncodedByOption: {}, linesTerminatedByOption: {}, skipRowsOption: {}, splitFileOption: {}, @@ -220,7 +223,8 @@ type Plan struct { ImportantSysVars map[string]string // used for LOAD DATA and CSV format of IMPORT INTO - FieldNullDef []string + FieldNullDef []string + FieldsEncodedBy config.FieldEncodeType // this is not used in IMPORT INTO NullValueOptEnclosed bool // LinesStartingBy is not used in IMPORT INTO @@ -515,6 +519,17 @@ func (e *LoadDataController) checkFieldParams() error { if e.Format != DataFormatCSV && e.Format != DataFormatParquet && e.Format != DataFormatSQL { return exeerrors.ErrLoadDataUnsupportedFormat.GenWithStackByArgs(e.Format) } + if e.FieldsEncodedBy == config.FieldEncodeBase64 { + if e.FieldsEnclosedBy != "" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_enclosed_by must be empty when fields_encoded_by is 'base64'") + } + if e.FieldsEscapedBy != "" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("fields_escaped_by must be empty when fields_encoded_by is 'base64'") + } + if e.Charset != nil && *e.Charset != "binary" { + return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("character_set must be 'binary' when fields_encoded_by is 'base64'") + } + } } else { if e.NullValueOptEnclosed && len(e.FieldsEnclosedBy) == 0 { return exeerrors.ErrLoadDataWrongFormatConfig.GenWithStackByArgs("must specify FIELDS [OPTIONALLY] ENCLOSED BY when use NULL DEFINED BY OPTIONALLY ENCLOSED") @@ -654,6 +669,17 @@ func (p *Plan) initOptions(ctx context.Context, seCtx sessionctx.Context, option } p.FieldNullDef = []string{v} } + if opt, ok := specifiedOptions[fieldsEncodedByOption]; ok { + v, err := optAsString(opt) + if err != nil { + return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) + } + v = strings.ToLower(v) + if config.FieldEncodeType(v) != config.FieldEncodeBase64 { + return exeerrors.ErrInvalidOptionVal.FastGenByArgs(opt.Name) + } + p.FieldsEncodedBy = config.FieldEncodeType(v) + } if opt, ok := specifiedOptions[linesTerminatedByOption]; ok { v, err := optAsString(opt) // cannot set terminator to empty string explicitly @@ -964,6 +990,7 @@ func (e *LoadDataController) GenerateCSVConfig() *config.CSVConfig { // ignore optionally enclosed FieldsEnclosedBy: e.FieldsEnclosedBy, LinesTerminatedBy: e.LinesTerminatedBy, + FieldsEncodedBy: e.FieldsEncodedBy, NotNull: false, FieldNullDefinedBy: e.FieldNullDef, Header: false, diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 4e4f2408ab00d..8c665a588c537 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -814,6 +814,17 @@ func (s *StringOrStringSlice) UnmarshalTOML(in any) error { return nil } +// FieldEncodeType is the type of encoding for a CSV field. +type FieldEncodeType string + +const ( + // FieldEncodeNone means no special encoding. + FieldEncodeNone FieldEncodeType = "" + // FieldEncodeBase64 means the field is encoded in base64. + // this encoding also implies some constraints on other parameters + FieldEncodeBase64 FieldEncodeType = "base64" +) + // CSVConfig is the config for CSV files. type CSVConfig struct { // FieldsTerminatedBy, FieldsEnclosedBy and LinesTerminatedBy should all be in utf8mb4 encoding. @@ -828,7 +839,8 @@ type CSVConfig struct { // deprecated, use `escaped-by` instead. BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"` // FieldsEscapedBy has higher priority than BackslashEscape, currently it must be a single character if set. - FieldsEscapedBy string `toml:"escaped-by" json:"escaped-by"` + FieldsEscapedBy string `toml:"escaped-by" json:"escaped-by"` + FieldsEncodedBy FieldEncodeType `toml:"encoded-by" json:"encoded-by"` // hide these options for lightning configuration file, they can only be used by LOAD DATA // https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling @@ -884,6 +896,21 @@ func (csv *CSVConfig) adjust() error { return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV terminator and `mydumper.csv.escaped-by`", csv.FieldsEscapedBy) } } + + csv.FieldsEncodedBy = FieldEncodeType(strings.ToLower(string(csv.FieldsEncodedBy))) + if csv.FieldsEncodedBy == FieldEncodeBase64 { + if csv.Header { + return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.header` must be false when `encoded-by` is 'base64'") + } + if csv.FieldsEnclosedBy != "" { + return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.delimiter` must be empty when `encoded-by` is 'base64'") + } + if csv.FieldsEscapedBy != "" { + return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.escaped-by` must be empty when `encoded-by` is 'base64'") + } + } else if csv.FieldsEncodedBy != FieldEncodeNone { + return common.ErrInvalidConfig.GenWithStack("unsupported `encoded-by` value '%s'", csv.FieldsEncodedBy) + } return nil } @@ -951,6 +978,11 @@ func (m *MydumperRuntime) adjust() error { if len(m.DataCharacterSet) == 0 { m.DataCharacterSet = defaultCSVDataCharacterSet } + if m.CSV.FieldsEncodedBy == FieldEncodeBase64 { + if m.DataCharacterSet != "binary" { + return common.ErrInvalidConfig.GenWithStack("`mydumper.data-character-set` must be 'binary' when `mydumper.csv.encoded-by` is 'base64'") + } + } charset, err1 := ParseCharset(m.DataCharacterSet) if err1 != nil { return common.ErrInvalidConfig.Wrap(err1).GenWithStack("invalid `mydumper.data-character-set`") diff --git a/pkg/lightning/mydump/csv_parser.go b/pkg/lightning/mydump/csv_parser.go index 250fef14f2751..965760f8c5d44 100644 --- a/pkg/lightning/mydump/csv_parser.go +++ b/pkg/lightning/mydump/csv_parser.go @@ -17,6 +17,7 @@ package mydump import ( "bytes" "context" + "encoding/base64" "io" "regexp" "slices" @@ -55,6 +56,7 @@ type CSVParser struct { startingBy []byte escapedBy string unescapeRegexp *regexp.Regexp + base64Encoded bool charsetConvertor *CharsetConvertor // These variables are used with IndexAnyByte to search a byte slice for the @@ -168,6 +170,7 @@ func NewCSVParser( startingBy: []byte(cfg.LinesStartingBy), escapedBy: cfg.FieldsEscapedBy, unescapeRegexp: r, + base64Encoded: cfg.FieldsEncodedBy == config.FieldEncodeBase64, escFlavor: escFlavor, quoteByteSet: makeByteSet(quoteStopSet), unquoteByteSet: makeByteSet(unquoteStopSet), @@ -201,6 +204,16 @@ func encodeSpecialSymbols(cfg *config.CSVConfig, cc *CharsetConvertor) (separato } func (parser *CSVParser) unescapeString(input field) (unescaped string, isNull bool, err error) { + if parser.base64Encoded { + var decoded []byte + decoded, err = base64.StdEncoding.DecodeString(input.content) + if err != nil { + return + } + unescaped = string(decoded) + return + } + // Convert the input from another charset to utf8mb4 before we return the string. if unescaped, err = parser.charsetConvertor.Decode(input.content); err != nil { return From ff2b0ea8a6b6e9dd87a67a6877b886e59c15a038 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 3 Mar 2025 13:15:53 +0800 Subject: [PATCH 67/93] fix after merge --- pkg/executor/importer/table_import.go | 1 + pkg/lightning/mydump/simple_allocator.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/executor/importer/table_import.go b/pkg/executor/importer/table_import.go index 8cafd72dd4694..45d964a19f532 100644 --- a/pkg/executor/importer/table_import.go +++ b/pkg/executor/importer/table_import.go @@ -439,6 +439,7 @@ func (e *LoadDataController) PopulateChunks(ctx context.Context) (chunksMap map[ Type: region.FileMeta.Type, Compression: region.FileMeta.Compression, Timestamp: timestamp, + ParquetMeta: region.FileMeta.ParquetMeta, }) } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 9727918953bc7..dc12e79d03e12 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -46,8 +46,9 @@ func readInt(buf []byte) int { return int(buf[0])<<24 | int(buf[1])<<16 | int(buf[2])<<8 | int(buf[3]) } +// Because there may have memory fragment problems, we will over estimate the allocation size here. func simpleGetAllocationSize(size int) int { - return roundUp(size+metaSize, alignSize) * 2 + return roundUp(size+metaSize, alignSize) * 3 / 2 } /* From f1860771c9b84cc2f50d66f0370ed31849d03b33 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 3 Mar 2025 13:49:37 +0800 Subject: [PATCH 68/93] fix --- pkg/lightning/mydump/simple_allocator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index dc12e79d03e12..98ce3f00138df 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -48,7 +48,7 @@ func readInt(buf []byte) int { // Because there may have memory fragment problems, we will over estimate the allocation size here. func simpleGetAllocationSize(size int) int { - return roundUp(size+metaSize, alignSize) * 3 / 2 + return roundUp(size+metaSize, alignSize) * 2 } /* From 296f9d7dfbf561d3938938bf0a42b95cc5432255 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 3 Mar 2025 13:53:41 +0800 Subject: [PATCH 69/93] fix test --- pkg/lightning/mydump/allocator_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/lightning/mydump/allocator_test.go b/pkg/lightning/mydump/allocator_test.go index 1c5f14b4c154e..b69e6e1b41d21 100644 --- a/pkg/lightning/mydump/allocator_test.go +++ b/pkg/lightning/mydump/allocator_test.go @@ -24,9 +24,8 @@ import ( ) func TestSimpleAllocator(t *testing.T) { - totalSize := 16 << 20 - a := simpleAllocator{} - a.init(totalSize) + defaultArenaSize = 16 << 20 + a := getSimpleAllocator(nil) var ( lk sync.Mutex From af7007b1e6821bed5f62c9d4211a33b307696ebc Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 6 Mar 2025 16:41:20 +0800 Subject: [PATCH 70/93] Update code --- go.mod | 2 +- go.sum | 2 + lightning/pkg/importer/dup_detect.go | 1 - lightning/pkg/importer/table_import.go | 1 - .../importinto/encode_and_sort_operator.go | 1 + pkg/disttask/importinto/task_executor.go | 13 +- pkg/executor/importer/import.go | 15 +- pkg/lightning/mydump/allocator.go | 34 +++- pkg/lightning/mydump/loader.go | 12 +- pkg/lightning/mydump/parquet_parser.go | 175 ++++-------------- pkg/lightning/mydump/simple_allocator.go | 5 - 11 files changed, 82 insertions(+), 179 deletions(-) diff --git a/go.mod b/go.mod index 5987da1d53466..85df49198d5db 100644 --- a/go.mod +++ b/go.mod @@ -152,7 +152,7 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 +require github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 require ( filippo.io/edwards25519 v1.1.0 // indirect diff --git a/go.sum b/go.sum index 8f73d062d9cfe..124665d2e70bd 100644 --- a/go.sum +++ b/go.sum @@ -518,6 +518,8 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 h1:8QBwC5DOnNBqsXPpeGqD79FcYNTqVR6wDeczNpHLBpA= github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 h1:3Ec2rNvZT3b5HUlKi1aCCDu11sn7swFiqyjdpBrSe7c= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= diff --git a/lightning/pkg/importer/dup_detect.go b/lightning/pkg/importer/dup_detect.go index 98dc2444104a6..8539c24653fea 100644 --- a/lightning/pkg/importer/dup_detect.go +++ b/lightning/pkg/importer/dup_detect.go @@ -204,7 +204,6 @@ func (d *dupDetector) addKeysByChunk( chunk *checkpoints.ChunkCheckpoint, ) error { chunk.FileMeta.ParquetMeta.UseStreaming = true - chunk.FileMeta.ParquetMeta.UseSampleAllocator = false parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core) if err != nil { return err diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index fa6c5f5b92272..9a62b83651c78 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -777,7 +777,6 @@ ChunkLoop: break } - chunk.FileMeta.ParquetMeta.UseSampleAllocator = false chunk.FileMeta.ParquetMeta.MemoryQuota = mydump.GetMemoryQuota(rc.cfg.App.RegionConcurrency) cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 5c0d617a62558..5e2e3cf2c1443 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -234,6 +234,7 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) // For parquet file format, we allocate 40% of the memory to file reader. + // TODO(joechenrh): remove these hardcoded numbers. if plan.Format == importer.DataFormatParquet { memPerCon = memPerCon * 3 / 5 } diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index c95b0eb97216d..772363d40be1a 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -98,17 +98,20 @@ func getTableImporter( func (s *importStepExecutor) Init(ctx context.Context) error { s.logger.Info("init subtask env") - if s.taskMeta.Plan.Format == importer.DataFormatParquet { - // For `IMPORT INTO format "parquet"`, we set the memory usage for parquet reader to 40%. - mydump.ConfigureReaderLimitForParquet(40) - } - tableImporter, err := getTableImporter(ctx, s.taskID, s.taskMeta, s.store) if err != nil { return err } s.tableImporter = tableImporter + if s.taskMeta.Plan.Format == importer.DataFormatParquet { + // For `IMPORT INTO format "parquet"`, we set the memory usage for parquet reader to 40%. + mydump.ConfigureReaderLimitForParquet(40) + if s.tableImporter.EncodeThreadCnt > 0 { + s.tableImporter.Plan.ThreadCnt = min(s.tableImporter.EncodeThreadCnt, s.tableImporter.Plan.ThreadCnt) + } + } + // we need this sub context since Cleanup which wait on this routine is called // before parent context is canceled in normal flow. s.importCtx, s.importCancel = context.WithCancel(ctx) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index cb56212c6abf0..f2abb9c92e2a9 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -235,6 +235,7 @@ type Plan struct { DiskQuota config.ByteSize Checksum config.PostOpLevel ThreadCnt int + EncodeThreadCnt int MaxNodeCnt int MaxWriteSpeed config.ByteSize SplitFile bool @@ -560,6 +561,7 @@ func (p *Plan) initDefaultOptions(targetNodeCPUCnt int) { p.Checksum = config.OpLevelRequired p.ThreadCnt = threadCnt + p.EncodeThreadCnt = threadCnt p.MaxWriteSpeed = unlimitedWriteSpeed p.SplitFile = false p.MaxRecordedErrors = 100 @@ -1178,16 +1180,15 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { for _, dataFile := range dataFiles { // To reduce the memory usage, we only use streaming mode to read file. dataFile.ParquetMeta = mydump.ParquetFileMeta{ - MemoryUsageStream: memoryUsage, - MemoryUsageFull: memoryUsageFull, - MemoryQuota: mydump.GetMemoryQuota(e.ThreadCnt), - UseStreaming: true, - UseSampleAllocator: false, + MemoryUsageStream: memoryUsage, + MemoryUsageFull: memoryUsageFull, + MemoryQuota: mydump.GetMemoryQuota(e.ThreadCnt), + UseStreaming: true, } } - // TODO(joechnerh): Maybe we can adjust thread count for parquet here, - // when we support global sort using thread < 8. + // Adjust thread count for parquet here, because we may not be able to open ThreadCnt parquet files concurrently. + e.Plan.EncodeThreadCnt = mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) } e.dataFiles = dataFiles diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 333b71710d4ec..7c3929a955ecb 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -42,9 +42,6 @@ var ( globalPool *membuf.Pool importCount int - // AllocSize returns actual allocated size from arena - AllocSize func(int) int - // GetArena creates a new arena GetArena func(*membuf.Buffer) arena ) @@ -94,6 +91,7 @@ func ReleaseMemoryForParquet() { importCount-- if importCount == 0 { globalPool.Destroy() + globalPool = nil debug.SetGCPercent(100) //nolint: all_revive,revive runtime.GC() @@ -111,8 +109,18 @@ func GetMemoryQuota(concurrency int) int { return quotaPerReader } +// AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. +// TODO(joechenrh): remove hardcoded numbers. +func AdjustEncodeThreadCnt(memoryUsage, threadCnt int) int { + memTotal, err := tidbmemory.MemTotal() + if err != nil { + return threadCnt + } + + return max(min(int(memTotal)*2/5/memoryUsage, threadCnt), 1) +} + func init() { - AllocSize = simpleGetAllocationSize GetArena = getSimpleAllocator } @@ -139,15 +147,19 @@ type defaultAllocator struct { allocatedBuf map[uintptr]int } -func (alloc *defaultAllocator) Allocate(size int, _ memory.BufferType) []byte { +func (alloc *defaultAllocator) Allocate(size int) []byte { for i, a := range alloc.arenas { if buf := a.allocate(size); buf != nil { alloc.allocatedBuf[addressOf(buf)] = i return buf } } - mbuf := globalPool.NewBuffer() - alloc.mbufs = append(alloc.mbufs, mbuf) + + var mbuf *membuf.Buffer + if globalPool != nil { + mbuf = globalPool.NewBuffer() + alloc.mbufs = append(alloc.mbufs, mbuf) + } na := GetArena(mbuf) buf := na.allocate(size) @@ -164,9 +176,9 @@ func (alloc *defaultAllocator) Free(buf []byte) { } } -func (alloc *defaultAllocator) Reallocate(size int, buf []byte, tp memory.BufferType) []byte { +func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { alloc.Free(buf) - return alloc.Allocate(size, tp) + return alloc.Allocate(size) } func (alloc *defaultAllocator) Close() { @@ -179,6 +191,10 @@ func (alloc *defaultAllocator) Close() { alloc.arenas = nil } +func (alloc *defaultAllocator) Allocated() int { + return defaultArenaSize * len(alloc.arenas) +} + // GetAllocator get a default allocator func GetAllocator() memory.Allocator { return &defaultAllocator{ diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 32c3e72785220..98e276ae2acdc 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -88,12 +88,11 @@ type MDTableMeta struct { // ParquetFileMeta contains some analyzed metadata for a parquet file by MyDumper Loader. type ParquetFileMeta struct { - Rows int64 // row count - MemoryUsageStream int // memory usage for streaming mode - MemoryUsageFull int // memory usage for non-streaming mode - MemoryQuota int // memory quota for current file reader to use non-streaming mode - UseStreaming bool // whether use streaming mode - UseSampleAllocator bool // whether use sample allocator + Rows int64 // row count + MemoryUsageStream int // memory usage for streaming mode + MemoryUsageFull int // memory usage for non-streaming mode + MemoryQuota int // memory quota for current file reader to use non-streaming mode + UseStreaming bool // whether use streaming mode } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. @@ -582,7 +581,6 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size info.FileMeta.ParquetMeta.MemoryUsageStream = s.sampledParquetMemoryUsage[tableName] info.FileMeta.ParquetMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] info.FileMeta.ParquetMeta.UseStreaming = true - info.FileMeta.ParquetMeta.UseSampleAllocator = false } s.tableDatas = append(s.tableDatas, info) } diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 8c6288bf32549..bb1619734c7bd 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -41,10 +41,6 @@ const ( // defaultBatchSize is the number of rows fetched each time in the parquet reader defaultBatchSize = 128 - // readerPrefetchSize is the prefetch size for each reader. - // 1M is sufficient for most small Parquet files. - readerPrefetchSize = 1 << 20 - // defaultBufSize specifies the default size of skip buffer. // Skip buffer is used when reading data from the cloud. If there is a gap between the current // read position and the last read position, these data is stored in this buffer to avoid @@ -311,7 +307,7 @@ func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) if len(name) == 0 { name = pf.path } - reader, err := pf.store.Open(pf.ctx, name, &storage.ReaderOption{PrefetchSize: readerPrefetchSize}) + reader, err := pf.store.Open(pf.ctx, name, nil) if err != nil { return nil, errors.Trace(err) } @@ -703,6 +699,7 @@ func (pp *ParquetParser) Close() error { openedParser.Add(-1) }() + log.FromContext(context.Background()).Info("[parquet parser test] Close parquet parser") pp.resetReader() for _, r := range pp.readers { if err := r.Close(); err != nil { @@ -806,7 +803,7 @@ func OpenParquetReader( store storage.ExternalStorage, path string, ) (storage.ReadSeekCloser, error) { - r, err := store.Open(ctx, path, &storage.ReaderOption{PrefetchSize: readerPrefetchSize}) + r, err := store.Open(ctx, path, nil) if err != nil { return nil, err } @@ -840,51 +837,13 @@ func ReadParquetFileRowCountByFile( return reader.MetaData().NumRows, nil } -// sampleAllocator is used to collection memory usage in parquet reader. -type sampleAllocator struct { - maxCompressedLength int - maxDataPage int - totalDictPage int - otherAllocated int -} - -func (sa *sampleAllocator) Allocate(size int, tp memory.BufferType) []byte { - allocSize := AllocSize(size) - switch tp { - case memory.BufferCompressed: - sa.maxCompressedLength = max(sa.maxCompressedLength, allocSize) - case memory.BufferDataPage: - sa.maxDataPage = max(sa.maxDataPage, allocSize) - case memory.BufferDictionary: - // For each row group, we need to store all dictionary pages to decode data page. - sa.totalDictPage += allocSize - default: - sa.otherAllocated += allocSize - } - return make([]byte, size) -} - -func (*sampleAllocator) Free([]byte) {} - -func (sa *sampleAllocator) Reallocate(size int, _ []byte, tp memory.BufferType) []byte { - return sa.Allocate(size, tp) -} - -func (sa *sampleAllocator) reset() { - sa.maxCompressedLength = 0 - sa.maxDataPage = 0 - sa.totalDictPage = 0 - sa.otherAllocated = 0 -} - // GetDefaultParquetMeta return a default file meta func GetDefaultParquetMeta() ParquetFileMeta { return ParquetFileMeta{ - MemoryUsageStream: 0, - MemoryUsageFull: math.MaxInt32, - MemoryQuota: 0, - UseSampleAllocator: true, - UseStreaming: true, + MemoryUsageStream: 0, + MemoryUsageFull: math.MaxInt32, + MemoryQuota: 0, + UseStreaming: true, } } @@ -898,10 +857,7 @@ func NewParquetParser( ) (*ParquetParser, error) { // Acquire memory limiter first var memoryUsage int - if meta.UseSampleAllocator { - memoryUsage = 0 - meta.UseStreaming = true - } else if meta.MemoryUsageFull <= meta.MemoryQuota || meta.MemoryUsageFull == meta.MemoryUsageStream { + if meta.MemoryUsageFull <= meta.MemoryQuota { memoryUsage = meta.MemoryUsageFull meta.UseStreaming = false } else { @@ -909,7 +865,9 @@ func NewParquetParser( meta.UseStreaming = true } memoryUsage = min(memoryUsage, readerMemoryLimit) - readerMemoryLimiter.Acquire(memoryUsage) + if readerMemoryLimiter != nil { + readerMemoryLimiter.Acquire(memoryUsage) + } log.FromContext(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), @@ -918,7 +876,6 @@ func NewParquetParser( zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), zap.Int32("opened parser", openedParser.Add(1)), zap.Bool("streaming mode", meta.UseStreaming), - zap.Bool("use sample allocator", meta.UseSampleAllocator), ) wrapper, ok := r.(*parquetFileWrapper) @@ -932,14 +889,7 @@ func NewParquetParser( wrapper.Init(defaultBufSize) } - var allocator memory.Allocator - if meta.UseSampleAllocator { - allocator = &sampleAllocator{} - } else { - alloc := GetAllocator() - allocator = alloc - } - + allocator := GetAllocator() prop := parquet.NewReaderProperties(allocator) prop.BufferedStreamEnabled = meta.UseStreaming @@ -1018,75 +968,13 @@ func SampleStatisticsFromParquet( return 0, 0, 0, err } - wrapper := &parquetFileWrapper{ - ReadSeekCloser: r, - store: store, - ctx: ctx, - path: fileMeta.Path, - } - wrapper.Init(defaultBufSize) - - prop := parquet.NewReaderProperties(nil) - prop.BufferedStreamEnabled = true - reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) + parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, ParquetFileMeta{ + MemoryUsageStream: 0, + MemoryUsageFull: math.MaxInt, + MemoryQuota: 0, + }) if err != nil { - return 0, 0, 0, errors.Trace(err) - } - - //nolint: errcheck - defer reader.Close() - - fileSchema := reader.MetaData().Schema - columnMetas := make([]convertedType, fileSchema.NumColumns()) - columnNames := make([]string, 0, fileSchema.NumColumns()) - - for i := range columnMetas { - desc := reader.MetaData().Schema.Column(i) - columnNames = append(columnNames, strings.ToLower(desc.Name())) - - logicalType := desc.LogicalType() - if logicalType.IsValid() { - columnMetas[i].converted, columnMetas[i].decimalMeta = logicalType.ToConvertedType() - } else { - columnMetas[i].converted = desc.ConvertedType() - pnode, _ := desc.SchemaNode().(*schema.PrimitiveNode) - columnMetas[i].decimalMeta = pnode.DecimalMetadata() - } - } - - subreaders := make([]*file.Reader, 0, fileSchema.NumColumns()) - allSampleAllocators := make([]*sampleAllocator, 0, fileSchema.NumColumns()) - for i := 0; i < fileSchema.NumColumns(); i++ { - newWrapper, err := wrapper.Open("") - if err != nil { - return 0, 0, 0, errors.Trace(err) - } - - alloc := &sampleAllocator{} - prop := parquet.NewReaderProperties(alloc) - prop.BufferedStreamEnabled = true - allSampleAllocators = append(allSampleAllocators, alloc) - - reader, err := file.NewParquetReader( - newWrapper, - file.WithReadProps(prop), - file.WithMetadata(reader.MetaData()), - ) - - if err != nil { - return 0, 0, 0, errors.Trace(err) - } - subreaders = append(subreaders, reader) - } - - parser := &ParquetParser{ - readers: subreaders, - colMetas: columnMetas, - columnNames: columnNames, - logger: log.FromContext(ctx), - } - if err := parser.Init(); err != nil { - return 0, 0, 0, errors.Trace(err) + return 0, 0, 0, err } //nolint: errcheck @@ -1097,6 +985,7 @@ func SampleStatisticsFromParquet( rowCount int64 ) + reader := parser.readers[0] if reader.NumRowGroups() == 0 || reader.MetaData().RowGroups[0].NumRows == 0 { return 0, 0, 0, nil } @@ -1118,28 +1007,28 @@ func SampleStatisticsFromParquet( avgRowSize = float64(rowSize) / float64(rowCount) - memoryUsageStream = len(columnMetas) * readerPrefetchSize - memoryUsageFull = readerPrefetchSize + alloc := parser.alloc + defaultAlloc, _ := alloc.(*defaultAllocator) - for _, alloc := range allSampleAllocators { - memoryUsageFull += alloc.maxDataPage - memoryUsageFull += alloc.totalDictPage - memoryUsageStream += alloc.otherAllocated - memoryUsageStream += alloc.maxDataPage - memoryUsageStream += alloc.totalDictPage - } + // Here we add a defaultArenaSize to avoid differences in data between different files, as we only sample one file. + memoryUsageStream = defaultAlloc.Allocated() + defaultArenaSize + memoryUsageFull = defaultAlloc.Allocated() pageBufferFull := 0 for _, rg := range parser.readers[0].MetaData().RowGroups { totalUsage := 0 for _, c := range rg.Columns { - totalUsage += AllocSize(int(c.MetaData.GetTotalCompressedSize())) + bufSize := int(c.MetaData.GetTotalCompressedSize()) + // If single buffer size larger than arena size, non-stream mode will be disabled. + if bufSize > defaultArenaSize { + totalUsage = 32 << 30 + break + } + totalUsage += roundUp(bufSize, alignSize) } pageBufferFull = max(pageBufferFull, totalUsage) } memoryUsageFull += pageBufferFull - memoryUsageStream = roundUp(memoryUsageStream, defaultArenaSize) - memoryUsageFull = roundUp(memoryUsageFull, defaultArenaSize) - return avgRowSize, memoryUsageStream, memoryUsageFull, nil + return avgRowSize, memoryUsageStream, roundUp(memoryUsageFull, defaultArenaSize), nil } diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/simple_allocator.go index 98ce3f00138df..62586655e4a85 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/simple_allocator.go @@ -46,11 +46,6 @@ func readInt(buf []byte) int { return int(buf[0])<<24 | int(buf[1])<<16 | int(buf[2])<<8 | int(buf[3]) } -// Because there may have memory fragment problems, we will over estimate the allocation size here. -func simpleGetAllocationSize(size int) int { - return roundUp(size+metaSize, alignSize) * 2 -} - /* simpleAllocator is a very simple allocator with low allocation efficiency which manages allocated memory using a linked list structure. From df7ee45c11fa6999036bbf4afe84a06887751e37 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 6 Mar 2025 20:30:09 +0800 Subject: [PATCH 71/93] Update code --- pkg/disttask/importinto/encode_and_sort_operator.go | 7 ++++--- pkg/disttask/importinto/task_executor.go | 5 ++--- pkg/executor/importer/import.go | 4 ++-- pkg/lightning/mydump/allocator.go | 6 ++++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 5e2e3cf2c1443..0eae0e4bcd961 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/pkg/executor/importer" "github.com/pingcap/tidb/pkg/lightning/backend/external" "github.com/pingcap/tidb/pkg/lightning/membuf" + "github.com/pingcap/tidb/pkg/lightning/mydump" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/resourcemanager/pool/workerpool" "github.com/pingcap/tidb/pkg/resourcemanager/util" @@ -233,11 +234,11 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) indexKVGroupCnt := getNumOfIndexGenKV(plan.DesiredTableInfo) memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) - // For parquet file format, we allocate 40% of the memory to file reader. - // TODO(joechenrh): remove these hardcoded numbers. + // For parquet format, we allocate 40% of the memory to file reader. if plan.Format == importer.DataFormatParquet { - memPerCon = memPerCon * 3 / 5 + memPerCon = memPerCon * (100 - mydump.ImportIntoReaderUsage) / 100 } + // we use half of the total available memory for data writer, and the other half // for encoding and other stuffs, it's an experience value, might not optimal. // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index 772363d40be1a..7839adbf322fc 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -105,10 +105,9 @@ func (s *importStepExecutor) Init(ctx context.Context) error { s.tableImporter = tableImporter if s.taskMeta.Plan.Format == importer.DataFormatParquet { - // For `IMPORT INTO format "parquet"`, we set the memory usage for parquet reader to 40%. - mydump.ConfigureReaderLimitForParquet(40) + mydump.ConfigureReaderLimitForParquet(mydump.ImportIntoReaderUsage) if s.tableImporter.EncodeThreadCnt > 0 { - s.tableImporter.Plan.ThreadCnt = min(s.tableImporter.EncodeThreadCnt, s.tableImporter.Plan.ThreadCnt) + s.tableImporter.Plan.ThreadCnt = s.tableImporter.EncodeThreadCnt } } diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index f2abb9c92e2a9..68fcf7a8947f6 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -561,7 +561,6 @@ func (p *Plan) initDefaultOptions(targetNodeCPUCnt int) { p.Checksum = config.OpLevelRequired p.ThreadCnt = threadCnt - p.EncodeThreadCnt = threadCnt p.MaxWriteSpeed = unlimitedWriteSpeed p.SplitFile = false p.MaxRecordedErrors = 100 @@ -1187,7 +1186,8 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } } - // Adjust thread count for parquet here, because we may not be able to open ThreadCnt parquet files concurrently. + // Because we may not be able to open ThreadCnt files concurrently, + // we can adjust thread count for parquet here. e.Plan.EncodeThreadCnt = mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) } diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 7c3929a955ecb..76476bc67d361 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -28,6 +28,8 @@ import ( "go.uber.org/zap" ) +const ImportIntoReaderUsage = 40 + var ( // size of each arena defaultArenaSize = 256 << 20 @@ -110,14 +112,14 @@ func GetMemoryQuota(concurrency int) int { } // AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. -// TODO(joechenrh): remove hardcoded numbers. func AdjustEncodeThreadCnt(memoryUsage, threadCnt int) int { memTotal, err := tidbmemory.MemTotal() if err != nil { return threadCnt } - return max(min(int(memTotal)*2/5/memoryUsage, threadCnt), 1) + adjustedThreadCnt := int(memTotal) * ImportIntoReaderUsage / 100 / memoryUsage + return max(min(adjustedThreadCnt, threadCnt), 1) } func init() { From 1e8f83704e909264e4625999b94a4afcc82fc88e Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 7 Mar 2025 09:47:08 +0800 Subject: [PATCH 72/93] Fix merge sort --- .../importinto/encode_and_sort_operator.go | 8 +- pkg/executor/importer/import.go | 24 +++- pkg/lightning/mydump/allocator.go | 7 ++ pkg/lightning/mydump/loader.go | 12 +- pkg/lightning/mydump/parquet_parser.go | 118 ++++++++++++++---- 5 files changed, 131 insertions(+), 38 deletions(-) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 0eae0e4bcd961..e52e05d1337c7 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -232,7 +232,13 @@ func subtaskPrefix(taskID, subtaskID int64) string { func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) ( dataKVMemSizePerCon, perIndexKVMemSizePerCon uint64) { indexKVGroupCnt := getNumOfIndexGenKV(plan.DesiredTableInfo) - memPerCon := resource.Mem.Capacity() / int64(plan.ThreadCnt) + + threadCnt := plan.ThreadCnt + if plan.EncodeThreadCnt > 0 { + threadCnt = plan.EncodeThreadCnt + } + + memPerCon := resource.Mem.Capacity() / int64(threadCnt) // For parquet format, we allocate 40% of the memory to file reader. if plan.Format == importer.DataFormatParquet { diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 68fcf7a8947f6..ed39ccc9d5dbd 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1172,23 +1172,35 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { - _, memoryUsage, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) + _, memoryUsageStream, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) + streamThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsageStream, e.Plan.ThreadCnt) + nonstreamThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsageFull, e.Plan.ThreadCnt) + + encodeThreadCnt := streamThreadCnt + memoryUsage := memoryUsageStream + useStream := true + + // TODO(joechenrh): use a more proper way to choose mode. + if nonstreamThreadCnt > 1 && nonstreamThreadCnt >= streamThreadCnt/2 { + encodeThreadCnt = nonstreamThreadCnt + memoryUsage = memoryUsageFull + useStream = false + } + if err != nil { return errors.Trace(err) } for _, dataFile := range dataFiles { // To reduce the memory usage, we only use streaming mode to read file. dataFile.ParquetMeta = mydump.ParquetFileMeta{ - MemoryUsageStream: memoryUsage, - MemoryUsageFull: memoryUsageFull, - MemoryQuota: mydump.GetMemoryQuota(e.ThreadCnt), - UseStreaming: true, + MemoryUsage: memoryUsage, + UseStreaming: useStream, } } // Because we may not be able to open ThreadCnt files concurrently, // we can adjust thread count for parquet here. - e.Plan.EncodeThreadCnt = mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) + e.Plan.EncodeThreadCnt = encodeThreadCnt } e.dataFiles = dataFiles diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 76476bc67d361..075a7eb0c8583 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -112,6 +112,8 @@ func GetMemoryQuota(concurrency int) int { } // AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. +// It's used for IMPORT INTO. +// TODO(joechenrh): let lightning make use of it. func AdjustEncodeThreadCnt(memoryUsage, threadCnt int) int { memTotal, err := tidbmemory.MemTotal() if err != nil { @@ -143,6 +145,7 @@ type arena interface { } type defaultAllocator struct { + mu sync.Mutex arenas []arena mbufs []*membuf.Buffer @@ -150,6 +153,8 @@ type defaultAllocator struct { } func (alloc *defaultAllocator) Allocate(size int) []byte { + alloc.mu.Lock() + defer alloc.mu.Unlock() for i, a := range alloc.arenas { if buf := a.allocate(size); buf != nil { alloc.allocatedBuf[addressOf(buf)] = i @@ -171,6 +176,8 @@ func (alloc *defaultAllocator) Allocate(size int) []byte { } func (alloc *defaultAllocator) Free(buf []byte) { + alloc.mu.Lock() + defer alloc.mu.Unlock() addr := addressOf(buf) if arenaID, ok := alloc.allocatedBuf[addr]; ok { alloc.arenas[arenaID].free(buf) diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 98e276ae2acdc..3ce7622c1db7d 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -88,11 +88,9 @@ type MDTableMeta struct { // ParquetFileMeta contains some analyzed metadata for a parquet file by MyDumper Loader. type ParquetFileMeta struct { - Rows int64 // row count - MemoryUsageStream int // memory usage for streaming mode - MemoryUsageFull int // memory usage for non-streaming mode - MemoryQuota int // memory quota for current file reader to use non-streaming mode - UseStreaming bool // whether use streaming mode + Rows int64 // row count + MemoryUsage int // memory usage for reader + UseStreaming bool // whether use streaming mode } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. @@ -578,8 +576,8 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, path string, size if m, ok := metric.FromContext(ctx); ok { m.RowsCounter.WithLabelValues(metric.StateTotalRestore, tableName).Add(float64(totalRowCount)) } - info.FileMeta.ParquetMeta.MemoryUsageStream = s.sampledParquetMemoryUsage[tableName] - info.FileMeta.ParquetMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] + info.FileMeta.ParquetMeta.MemoryUsage = s.sampledParquetMemoryUsage[tableName] + // info.FileMeta.ParquetMeta.MemoryUsageFull = s.sampledParquetMemoryUsageFull[tableName] info.FileMeta.ParquetMeta.UseStreaming = true } s.tableDatas = append(s.tableDatas, info) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index bb1619734c7bd..11e3af2acfb1c 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -18,7 +18,6 @@ import ( "context" "fmt" "io" - "math" "math/big" "reflect" "strings" @@ -34,7 +33,9 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/zeropool" "go.uber.org/zap" + "golang.org/x/sync/errgroup" ) const ( @@ -334,7 +335,8 @@ type ParquetParser struct { dumpers []*columnDumper // rows stores the actual data after parsing. - rows [][]types.Datum + rows [][]types.Datum + rowPool *zeropool.Pool[[]types.Datum] // curIdx and avail is the current index and total number of rows in rows buffer curIdx int @@ -348,6 +350,7 @@ type ParquetParser struct { curRows int // number of rows read in total totalRows int // total rows in this file totalBytesRead int // total bytes read, estimated by all the read datum. + firstAfterReset bool lastRow Row logger log.Logger @@ -531,6 +534,10 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { return 0, nil } + for i := range readNum { + pp.rows[i] = pp.rowPool.Get() + } + read := 0 for read < readNum { // Move to next row group @@ -539,6 +546,7 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { pp.resetReader() } pp.curRowGroup++ + pp.firstAfterReset = true for c := 0; c < len(pp.dumpers); c++ { rowGroupReader := pp.readers[c].RowGroup(pp.curRowGroup) colReader, err := rowGroupReader.Column(c) @@ -578,6 +586,24 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { total int ) + // After moving to the next row group, we have to read several pages, + // so we do this concurrently. + if pp.firstAfterReset { + pp.firstAfterReset = false + var eg errgroup.Group + eg.SetLimit(2) + for i := range len(pp.dumpers) { + dumper := pp.dumpers[i] + eg.Go(func() error { + dumper.readNextBatch() + return nil + }) + } + if err := eg.Wait(); err != nil { + return 0, err + } + } + // Read data into buffers first for col, dumper := range pp.dumpers { meta := pp.colMetas[col] @@ -630,7 +656,7 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { } } - for i := 0; i < num; i++ { + for i := range num { val, ok := dumper.Next() if !ok { break @@ -771,7 +797,8 @@ func (pp *ParquetParser) LastRow() Row { } // RecycleRow implements the Parser interface. -func (*ParquetParser) RecycleRow(_ Row) { +func (pp *ParquetParser) RecycleRow(row Row) { + pp.rowPool.Put(row.Row) } // Columns returns the _lower-case_ column names corresponding to values in @@ -840,10 +867,8 @@ func ReadParquetFileRowCountByFile( // GetDefaultParquetMeta return a default file meta func GetDefaultParquetMeta() ParquetFileMeta { return ParquetFileMeta{ - MemoryUsageStream: 0, - MemoryUsageFull: math.MaxInt32, - MemoryQuota: 0, - UseStreaming: true, + MemoryUsage: 0, + UseStreaming: true, } } @@ -857,22 +882,13 @@ func NewParquetParser( ) (*ParquetParser, error) { // Acquire memory limiter first var memoryUsage int - if meta.MemoryUsageFull <= meta.MemoryQuota { - memoryUsage = meta.MemoryUsageFull - meta.UseStreaming = false - } else { - memoryUsage = meta.MemoryUsageStream - meta.UseStreaming = true - } - memoryUsage = min(memoryUsage, readerMemoryLimit) + memoryUsage = min(meta.MemoryUsage, readerMemoryLimit) if readerMemoryLimiter != nil { readerMemoryLimiter.Acquire(memoryUsage) } log.FromContext(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), - zap.String("memory usage full", fmt.Sprintf("%d MB", meta.MemoryUsageFull>>20)), - zap.String("memory quota", fmt.Sprintf("%d MB", meta.MemoryQuota>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), zap.Int32("opened parser", openedParser.Add(1)), zap.Bool("streaming mode", meta.UseStreaming), @@ -936,6 +952,11 @@ func NewParquetParser( subreaders = append(subreaders, reader) } + numColumns := len(columnMetas) + pool := zeropool.New(func() []types.Datum { + return make([]types.Datum, numColumns) + }) + parser := &ParquetParser{ readers: subreaders, colMetas: columnMetas, @@ -944,6 +965,7 @@ func NewParquetParser( logger: log.FromContext(ctx), memoryUsage: memoryUsage, memLimiter: readerMemoryLimiter, + rowPool: &pool, } if err := parser.Init(); err != nil { return nil, errors.Trace(err) @@ -952,6 +974,45 @@ func NewParquetParser( return parser, nil } +func estimateNonStreamMemory( + ctx context.Context, + fileMeta SourceFileMeta, + store storage.ExternalStorage, +) (int, error) { + r, err := store.Open(ctx, fileMeta.Path, nil) + if err != nil { + return 0, err + } + + parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, ParquetFileMeta{ + MemoryUsage: 0, + UseStreaming: false, + }) + if err != nil { + return 0, err + } + + //nolint: errcheck + defer parser.Close() + + reader := parser.readers[0] + totalReadRows := reader.MetaData().RowGroups[0].NumRows + for i := 0; i < int(totalReadRows); i++ { + err = parser.ReadRow() + if err != nil { + if errors.Cause(err) == io.EOF { + break + } + return 0, err + } + lastRow := parser.LastRow() + parser.RecycleRow(lastRow) + } + + defaultAlloc, _ := parser.alloc.(*defaultAllocator) + return defaultAlloc.Allocated() + defaultArenaSize, nil +} + // SampleStatisticsFromParquet samples row size and memory usage of the parquet file. func SampleStatisticsFromParquet( ctx context.Context, @@ -969,9 +1030,8 @@ func SampleStatisticsFromParquet( } parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, ParquetFileMeta{ - MemoryUsageStream: 0, - MemoryUsageFull: math.MaxInt, - MemoryQuota: 0, + MemoryUsage: 0, + UseStreaming: true, }) if err != nil { return 0, 0, 0, err @@ -1012,9 +1072,9 @@ func SampleStatisticsFromParquet( // Here we add a defaultArenaSize to avoid differences in data between different files, as we only sample one file. memoryUsageStream = defaultAlloc.Allocated() + defaultArenaSize - memoryUsageFull = defaultAlloc.Allocated() pageBufferFull := 0 + memoryUsageFull = defaultAlloc.Allocated() for _, rg := range parser.readers[0].MetaData().RowGroups { totalUsage := 0 for _, c := range rg.Columns { @@ -1028,7 +1088,17 @@ func SampleStatisticsFromParquet( } pageBufferFull = max(pageBufferFull, totalUsage) } - memoryUsageFull += pageBufferFull - return avgRowSize, memoryUsageStream, roundUp(memoryUsageFull, defaultArenaSize), nil + // Do some precheck, to prevent OOM during estimate memory usage. + memoryUsageFull = roundUp(memoryUsageFull+pageBufferFull, defaultArenaSize) + if memoryUsageFull < (6 << 30) { + memoryUsageFull, err = estimateNonStreamMemory(ctx, fileMeta, store) + } + + log.FromContext(ctx).Info("Get memory usage of parquet reader", + zap.String("memory usage full", fmt.Sprintf("%d MB", memoryUsageFull>>20)), + zap.String("memory usage stream", fmt.Sprintf("%d MB", memoryUsageStream>>20)), + ) + + return avgRowSize, memoryUsageStream, memoryUsageFull, err } From 6fb09b8cdb780dfad8a2c701dc765311ca8cbadf Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 7 Mar 2025 09:54:03 +0800 Subject: [PATCH 73/93] [test] skip preprocess for test --- pkg/lightning/mydump/region.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/lightning/mydump/region.go b/pkg/lightning/mydump/region.go index 27ac1230ab105..5dec4e81c90d7 100644 --- a/pkg/lightning/mydump/region.go +++ b/pkg/lightning/mydump/region.go @@ -362,18 +362,20 @@ func MakeSourceFileRegion( // because parquet files can't seek efficiently, there is no benefit in split. // parquet file are column orient, so the offset is read line number func makeParquetFileRegion( - ctx context.Context, + _ context.Context, cfg *DataDivideConfig, dataFile FileInfo, ) ([]*TableRegion, []float64, error) { numberRows := dataFile.FileMeta.ParquetMeta.Rows - var err error + // var err error // for safety if numberRows <= 0 { - numberRows, err = ReadParquetFileRowCountByFile(ctx, cfg.Store, dataFile.FileMeta) - if err != nil { - return nil, nil, err - } + // TODO(joechenrh): only for test + numberRows = 1600000 + // numberRows, err = ReadParquetFileRowCountByFile(ctx, cfg.Store, dataFile.FileMeta) + // if err != nil { + // return nil, nil, err + // } } region := &TableRegion{ DB: cfg.TableMeta.DB, From 4be7553f26d23af038503b697c36ea4e6bc0868b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 7 Mar 2025 12:04:50 +0800 Subject: [PATCH 74/93] Adjust encode concurrency based on memory usage --- pkg/executor/importer/import.go | 14 +--------- pkg/lightning/mydump/allocator.go | 16 ++++++++--- pkg/lightning/mydump/parquet_parser.go | 37 +++++++++++++++----------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index ed39ccc9d5dbd..73e0160d96485 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1173,19 +1173,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { _, memoryUsageStream, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) - streamThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsageStream, e.Plan.ThreadCnt) - nonstreamThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsageFull, e.Plan.ThreadCnt) - - encodeThreadCnt := streamThreadCnt - memoryUsage := memoryUsageStream - useStream := true - - // TODO(joechenrh): use a more proper way to choose mode. - if nonstreamThreadCnt > 1 && nonstreamThreadCnt >= streamThreadCnt/2 { - encodeThreadCnt = nonstreamThreadCnt - memoryUsage = memoryUsageFull - useStream = false - } + memoryUsage, encodeThreadCnt, useStream := mydump.AdjustEncodeThreadCnt(memoryUsageStream, memoryUsageFull, e.Plan.ThreadCnt) if err != nil { return errors.Trace(err) diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 075a7eb0c8583..7c3fb7c2a27a0 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -114,14 +114,22 @@ func GetMemoryQuota(concurrency int) int { // AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. // It's used for IMPORT INTO. // TODO(joechenrh): let lightning make use of it. -func AdjustEncodeThreadCnt(memoryUsage, threadCnt int) int { +func AdjustEncodeThreadCnt(memoryUsageStream, memoryUsageFull, threadCnt int, +) (memoryUsage, adjustCnt int, useStream bool) { memTotal, err := tidbmemory.MemTotal() if err != nil { - return threadCnt + return memoryUsage, threadCnt, true } - adjustedThreadCnt := int(memTotal) * ImportIntoReaderUsage / 100 / memoryUsage - return max(min(adjustedThreadCnt, threadCnt), 1) + streamThreadCnt := max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsageStream, threadCnt), 1) + fullThreadCnt := max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsageFull, threadCnt), 1) + + // TODO(joechenrh): use a more proper way to choose mode. + if streamThreadCnt == fullThreadCnt { + return memoryUsageFull, fullThreadCnt, false + } + + return memoryUsageStream, streamThreadCnt, true } func init() { diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 11e3af2acfb1c..1a2675bffef71 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -335,6 +335,7 @@ type ParquetParser struct { dumpers []*columnDumper // rows stores the actual data after parsing. + // rows will be fetched from rowPool and reclaimed after recycle. rows [][]types.Datum rowPool *zeropool.Pool[[]types.Datum] @@ -351,6 +352,7 @@ type ParquetParser struct { totalRows int // total rows in this file totalBytesRead int // total bytes read, estimated by all the read datum. firstAfterReset bool + parallelRead bool lastRow Row logger log.Logger @@ -507,7 +509,7 @@ func (pp *ParquetParser) Init() error { } pp.dumpers = make([]*columnDumper, numCols) - for i := 0; i < numCols; i++ { + for i := range numCols { pp.dumpers[i] = createDumper(meta.Schema.Column(i).PhysicalType()) } @@ -547,7 +549,7 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { } pp.curRowGroup++ pp.firstAfterReset = true - for c := 0; c < len(pp.dumpers); c++ { + for c := range len(pp.dumpers) { rowGroupReader := pp.readers[c].RowGroup(pp.curRowGroup) colReader, err := rowGroupReader.Column(c) if err != nil { @@ -568,7 +570,7 @@ func (pp *ParquetParser) ReadRows(num int) (int, error) { pp.curRowInGroup += curRead } - for i := 0; i < readNum; i++ { + for i := range readNum { pp.totalBytesRead += estimateRowSize(pp.rows[i]) } @@ -586,12 +588,14 @@ func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { total int ) - // After moving to the next row group, we have to read several pages, - // so we do this concurrently. - if pp.firstAfterReset { + // After moving to the next row group, we need to read one dict page and + // at least one data page for each column. + // Since it's an I/O intensive operation, so we perform it in parallel. + // TODO(joechen): 4 is a experimental value and can be changed later. + if pp.firstAfterReset && pp.parallelRead { pp.firstAfterReset = false var eg errgroup.Group - eg.SetLimit(2) + eg.SetLimit(4) for i := range len(pp.dumpers) { dumper := pp.dumpers[i] eg.Go(func() error { @@ -958,14 +962,15 @@ func NewParquetParser( }) parser := &ParquetParser{ - readers: subreaders, - colMetas: columnMetas, - columnNames: columnNames, - alloc: allocator, - logger: log.FromContext(ctx), - memoryUsage: memoryUsage, - memLimiter: readerMemoryLimiter, - rowPool: &pool, + readers: subreaders, + colMetas: columnMetas, + columnNames: columnNames, + alloc: allocator, + logger: log.FromContext(ctx), + memoryUsage: memoryUsage, + memLimiter: readerMemoryLimiter, + rowPool: &pool, + parallelRead: !strings.HasPrefix(store.URI(), storage.LocalURIPrefix) && meta.UseStreaming, } if err := parser.Init(); err != nil { return nil, errors.Trace(err) @@ -1089,7 +1094,7 @@ func SampleStatisticsFromParquet( pageBufferFull = max(pageBufferFull, totalUsage) } - // Do some precheck, to prevent OOM during estimate memory usage. + // Do some precheck, to prevent OOM during estimate memory usage due to large row group. memoryUsageFull = roundUp(memoryUsageFull+pageBufferFull, defaultArenaSize) if memoryUsageFull < (6 << 30) { memoryUsageFull, err = estimateNonStreamMemory(ctx, fileMeta, store) From af1b47fbb49cd583e8d8a3ee3b568bef522e5d84 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 13 Mar 2025 18:25:40 +0800 Subject: [PATCH 75/93] add test --- br/pkg/storage/gcs.go | 41 ++++++++++++++++++++++++++++++++++--- br/pkg/storage/ks3.go | 6 +++--- br/pkg/storage/s3.go | 6 +++--- go.sum | 2 -- pkg/util/prefetch/reader.go | 25 +++++++++++++++++++++- 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/br/pkg/storage/gcs.go b/br/pkg/storage/gcs.go index 6f31843bf6157..d21c083bbfcbe 100644 --- a/br/pkg/storage/gcs.go +++ b/br/pkg/storage/gcs.go @@ -10,6 +10,7 @@ import ( "io" "os" "path" + "runtime" "strings" "cloud.google.com/go/storage" @@ -479,6 +480,38 @@ func (s *GCSStorage) Reset(ctx context.Context) error { return nil } +func printCallStack() string { + var ( + pcs = make([]uintptr, 32) // 存储程序计数器的数组 + n = runtime.Callers(2, pcs[:]) // 跳过两层(当前函数和调用printCallStack的位置) + frames = runtime.CallersFrames(pcs[:n]) + callers []string + ) + + // 遍历调用栈帧 + for { + frame, more := frames.Next() + funcName := fmt.Sprintf("%s (%s:%d)", frame.Function, frame.File, frame.Line) + + // 提取短函数名(例如:main.foo -> foo) + // if idx := strings.LastIndex(funcName, "."); idx != -1 { + // funcName = funcName[idx+1:] + // } + + callers = append(callers, funcName) + if !more { + break + } + } + + // 反转顺序以显示正确调用链 + for i, j := 0, len(callers)-1; i < j; i, j = i+1, j-1 { + callers[i], callers[j] = callers[j], callers[i] + } + + return strings.Join(callers, " -> ") +} + func shouldRetry(err error) bool { if storage.ShouldRetry(err) { return true @@ -522,7 +555,9 @@ func shouldRetry(err error) bool { if !goerrors.Is(err, context.Canceled) { log.Warn("other error when requesting gcs", zap.Error(err), - zap.String("info", fmt.Sprintf("type: %T, value: %#v", err, err))) + zap.String("info", fmt.Sprintf("type: %T, value: %#v", err, err)), + zap.String("call_stack", printCallStack()), + ) } return false @@ -560,7 +595,7 @@ func (r *gcsObjectReader) Read(p []byte) (n int, err error) { } r.reader = rc if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } } n, err = r.reader.Read(p) @@ -620,7 +655,7 @@ func (r *gcsObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = rc if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } return realOffset, nil diff --git a/br/pkg/storage/ks3.go b/br/pkg/storage/ks3.go index 3d4977ef39bd7..12bb9f43dd66f 100644 --- a/br/pkg/storage/ks3.go +++ b/br/pkg/storage/ks3.go @@ -443,7 +443,7 @@ func (rs *KS3Storage) Open(ctx context.Context, path string, o *ReaderOption) (E return nil, errors.Trace(err) } if prefetchSize > 0 { - reader = prefetch.NewReader(reader, prefetchSize) + reader = prefetch.NewReader(reader, "", prefetchSize) } return &ks3ObjectReader{ ctx: ctx, @@ -571,7 +571,7 @@ func (r *ks3ObjectReader) Read(p []byte) (n int, err error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } retryCnt++ n, err = r.reader.Read(p[:maxCnt]) @@ -643,7 +643,7 @@ func (r *ks3ObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } r.rangeInfo = info r.pos = realOffset diff --git a/br/pkg/storage/s3.go b/br/pkg/storage/s3.go index 10f969dc09895..e55db00ef9c52 100644 --- a/br/pkg/storage/s3.go +++ b/br/pkg/storage/s3.go @@ -822,7 +822,7 @@ func (rs *S3Storage) Open(ctx context.Context, path string, o *ReaderOption) (Ex return nil, errors.Trace(err) } if prefetchSize > 0 { - reader = prefetch.NewReader(reader, o.PrefetchSize) + reader = prefetch.NewReader(reader, path, o.PrefetchSize) } return &s3ObjectReader{ storage: rs, @@ -998,7 +998,7 @@ func (r *s3ObjectReader) Read(p []byte) (n int, err error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } retryCnt++ n, err = r.reader.Read(p[:maxCnt]) @@ -1070,7 +1070,7 @@ func (r *s3ObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) } r.rangeInfo = info r.pos = realOffset diff --git a/go.sum b/go.sum index 124665d2e70bd..78e99228d01cb 100644 --- a/go.sum +++ b/go.sum @@ -516,8 +516,6 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 h1:8QBwC5DOnNBqsXPpeGqD79FcYNTqVR6wDeczNpHLBpA= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 h1:3Ec2rNvZT3b5HUlKi1aCCDu11sn7swFiqyjdpBrSe7c= github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= diff --git a/pkg/util/prefetch/reader.go b/pkg/util/prefetch/reader.go index 863ce942f78bb..463b6c8859bd2 100644 --- a/pkg/util/prefetch/reader.go +++ b/pkg/util/prefetch/reader.go @@ -19,11 +19,15 @@ import ( "errors" "io" "sync" + + "github.com/pingcap/log" + "go.uber.org/zap" ) // Reader is a reader that prefetches data from the underlying reader. type Reader struct { r io.ReadCloser + path string curBufReader *bytes.Reader buf [2][]byte bufIdx int @@ -36,9 +40,10 @@ type Reader struct { } // NewReader creates a new Reader. -func NewReader(r io.ReadCloser, prefetchSize int) io.ReadCloser { +func NewReader(r io.ReadCloser, path string, prefetchSize int) io.ReadCloser { ret := &Reader{ r: r, + path: path, bufCh: make(chan []byte), err: nil, closedCh: make(chan struct{}), @@ -63,12 +68,19 @@ func (r *Reader) run() { case r.bufCh <- buf: } if err != nil { + if !errors.Is(err, io.EOF) { + log.Info("get other error during prefetch", + zap.Error(err), zap.Int("bytes", n), zap.Int("cap", cap(buf)), zap.String("path", r.path)) + } if errors.Is(err, io.ErrUnexpectedEOF) { // this is caused by io.ReadFull. Because we are prefetching, the buffer size may // be larger that caller's need. So we return io.EOF instead. Let caller check // its needed size to convert io.EOF to io.ErrUnexpectedEOF. + log.Info("get other error during prefetch, not unexpected eof", + zap.Error(err), zap.Int("bytes", n), zap.Int("cap", cap(buf)), zap.String("path", r.path)) err = io.EOF } + r.err = err close(r.bufCh) return @@ -84,6 +96,13 @@ func (r *Reader) Read(data []byte) (int, error) { b, ok := <-r.bufCh if !ok { if total > 0 { + if r.err != nil && !errors.Is(r.err, io.EOF) { + log.Info("read total > 0 but has internal error", + zap.Error(r.err), zap.Int("bytes", total), + zap.Int("expected", len(data)), + zap.String("path", r.path), + ) + } return total, nil } return 0, r.err @@ -96,6 +115,10 @@ func (r *Reader) Read(data []byte) (int, error) { n, err := r.curBufReader.Read(data) total += n if n == expected { + if r.err != nil && !errors.Is(r.err, io.EOF) { + log.Info("read suceess but has internal error", + zap.Error(r.err), zap.Int("bytes", n), zap.String("path", r.path)) + } return total, nil } From 0850431d9e57ad65801c53b305d4cf964c7b0499 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 26 May 2025 13:37:46 +0800 Subject: [PATCH 76/93] Revert "add test" This reverts commit af1b47fbb49cd583e8d8a3ee3b568bef522e5d84. --- br/pkg/storage/gcs.go | 41 +++---------------------------------- br/pkg/storage/ks3.go | 6 +++--- br/pkg/storage/s3.go | 6 +++--- go.sum | 2 ++ pkg/util/prefetch/reader.go | 25 +--------------------- 5 files changed, 12 insertions(+), 68 deletions(-) diff --git a/br/pkg/storage/gcs.go b/br/pkg/storage/gcs.go index d21c083bbfcbe..6f31843bf6157 100644 --- a/br/pkg/storage/gcs.go +++ b/br/pkg/storage/gcs.go @@ -10,7 +10,6 @@ import ( "io" "os" "path" - "runtime" "strings" "cloud.google.com/go/storage" @@ -480,38 +479,6 @@ func (s *GCSStorage) Reset(ctx context.Context) error { return nil } -func printCallStack() string { - var ( - pcs = make([]uintptr, 32) // 存储程序计数器的数组 - n = runtime.Callers(2, pcs[:]) // 跳过两层(当前函数和调用printCallStack的位置) - frames = runtime.CallersFrames(pcs[:n]) - callers []string - ) - - // 遍历调用栈帧 - for { - frame, more := frames.Next() - funcName := fmt.Sprintf("%s (%s:%d)", frame.Function, frame.File, frame.Line) - - // 提取短函数名(例如:main.foo -> foo) - // if idx := strings.LastIndex(funcName, "."); idx != -1 { - // funcName = funcName[idx+1:] - // } - - callers = append(callers, funcName) - if !more { - break - } - } - - // 反转顺序以显示正确调用链 - for i, j := 0, len(callers)-1; i < j; i, j = i+1, j-1 { - callers[i], callers[j] = callers[j], callers[i] - } - - return strings.Join(callers, " -> ") -} - func shouldRetry(err error) bool { if storage.ShouldRetry(err) { return true @@ -555,9 +522,7 @@ func shouldRetry(err error) bool { if !goerrors.Is(err, context.Canceled) { log.Warn("other error when requesting gcs", zap.Error(err), - zap.String("info", fmt.Sprintf("type: %T, value: %#v", err, err)), - zap.String("call_stack", printCallStack()), - ) + zap.String("info", fmt.Sprintf("type: %T, value: %#v", err, err))) } return false @@ -595,7 +560,7 @@ func (r *gcsObjectReader) Read(p []byte) (n int, err error) { } r.reader = rc if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } } n, err = r.reader.Read(p) @@ -655,7 +620,7 @@ func (r *gcsObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = rc if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } return realOffset, nil diff --git a/br/pkg/storage/ks3.go b/br/pkg/storage/ks3.go index 12bb9f43dd66f..3d4977ef39bd7 100644 --- a/br/pkg/storage/ks3.go +++ b/br/pkg/storage/ks3.go @@ -443,7 +443,7 @@ func (rs *KS3Storage) Open(ctx context.Context, path string, o *ReaderOption) (E return nil, errors.Trace(err) } if prefetchSize > 0 { - reader = prefetch.NewReader(reader, "", prefetchSize) + reader = prefetch.NewReader(reader, prefetchSize) } return &ks3ObjectReader{ ctx: ctx, @@ -571,7 +571,7 @@ func (r *ks3ObjectReader) Read(p []byte) (n int, err error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } retryCnt++ n, err = r.reader.Read(p[:maxCnt]) @@ -643,7 +643,7 @@ func (r *ks3ObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } r.rangeInfo = info r.pos = realOffset diff --git a/br/pkg/storage/s3.go b/br/pkg/storage/s3.go index e55db00ef9c52..10f969dc09895 100644 --- a/br/pkg/storage/s3.go +++ b/br/pkg/storage/s3.go @@ -822,7 +822,7 @@ func (rs *S3Storage) Open(ctx context.Context, path string, o *ReaderOption) (Ex return nil, errors.Trace(err) } if prefetchSize > 0 { - reader = prefetch.NewReader(reader, path, o.PrefetchSize) + reader = prefetch.NewReader(reader, o.PrefetchSize) } return &s3ObjectReader{ storage: rs, @@ -998,7 +998,7 @@ func (r *s3ObjectReader) Read(p []byte) (n int, err error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } retryCnt++ n, err = r.reader.Read(p[:maxCnt]) @@ -1070,7 +1070,7 @@ func (r *s3ObjectReader) Seek(offset int64, whence int) (int64, error) { } r.reader = newReader if r.prefetchSize > 0 { - r.reader = prefetch.NewReader(r.reader, r.name, r.prefetchSize) + r.reader = prefetch.NewReader(r.reader, r.prefetchSize) } r.rangeInfo = info r.pos = realOffset diff --git a/go.sum b/go.sum index 78e99228d01cb..124665d2e70bd 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7 h1:8QBwC5DOnNBqsXPpeGqD79FcYNTqVR6wDeczNpHLBpA= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250215045230-203e420514b7/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 h1:3Ec2rNvZT3b5HUlKi1aCCDu11sn7swFiqyjdpBrSe7c= github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= diff --git a/pkg/util/prefetch/reader.go b/pkg/util/prefetch/reader.go index 463b6c8859bd2..863ce942f78bb 100644 --- a/pkg/util/prefetch/reader.go +++ b/pkg/util/prefetch/reader.go @@ -19,15 +19,11 @@ import ( "errors" "io" "sync" - - "github.com/pingcap/log" - "go.uber.org/zap" ) // Reader is a reader that prefetches data from the underlying reader. type Reader struct { r io.ReadCloser - path string curBufReader *bytes.Reader buf [2][]byte bufIdx int @@ -40,10 +36,9 @@ type Reader struct { } // NewReader creates a new Reader. -func NewReader(r io.ReadCloser, path string, prefetchSize int) io.ReadCloser { +func NewReader(r io.ReadCloser, prefetchSize int) io.ReadCloser { ret := &Reader{ r: r, - path: path, bufCh: make(chan []byte), err: nil, closedCh: make(chan struct{}), @@ -68,19 +63,12 @@ func (r *Reader) run() { case r.bufCh <- buf: } if err != nil { - if !errors.Is(err, io.EOF) { - log.Info("get other error during prefetch", - zap.Error(err), zap.Int("bytes", n), zap.Int("cap", cap(buf)), zap.String("path", r.path)) - } if errors.Is(err, io.ErrUnexpectedEOF) { // this is caused by io.ReadFull. Because we are prefetching, the buffer size may // be larger that caller's need. So we return io.EOF instead. Let caller check // its needed size to convert io.EOF to io.ErrUnexpectedEOF. - log.Info("get other error during prefetch, not unexpected eof", - zap.Error(err), zap.Int("bytes", n), zap.Int("cap", cap(buf)), zap.String("path", r.path)) err = io.EOF } - r.err = err close(r.bufCh) return @@ -96,13 +84,6 @@ func (r *Reader) Read(data []byte) (int, error) { b, ok := <-r.bufCh if !ok { if total > 0 { - if r.err != nil && !errors.Is(r.err, io.EOF) { - log.Info("read total > 0 but has internal error", - zap.Error(r.err), zap.Int("bytes", total), - zap.Int("expected", len(data)), - zap.String("path", r.path), - ) - } return total, nil } return 0, r.err @@ -115,10 +96,6 @@ func (r *Reader) Read(data []byte) (int, error) { n, err := r.curBufReader.Read(data) total += n if n == expected { - if r.err != nil && !errors.Is(r.err, io.EOF) { - log.Info("read suceess but has internal error", - zap.Error(r.err), zap.Int("bytes", n), zap.String("path", r.path)) - } return total, nil } From 3781ec752d5d0236c15a8a9a38bff6fc885ebb5b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 26 May 2025 13:37:51 +0800 Subject: [PATCH 77/93] Revert "[test] skip preprocess for test" This reverts commit 6fb09b8cdb780dfad8a2c701dc765311ca8cbadf. --- pkg/lightning/mydump/region.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pkg/lightning/mydump/region.go b/pkg/lightning/mydump/region.go index 5dec4e81c90d7..27ac1230ab105 100644 --- a/pkg/lightning/mydump/region.go +++ b/pkg/lightning/mydump/region.go @@ -362,20 +362,18 @@ func MakeSourceFileRegion( // because parquet files can't seek efficiently, there is no benefit in split. // parquet file are column orient, so the offset is read line number func makeParquetFileRegion( - _ context.Context, + ctx context.Context, cfg *DataDivideConfig, dataFile FileInfo, ) ([]*TableRegion, []float64, error) { numberRows := dataFile.FileMeta.ParquetMeta.Rows - // var err error + var err error // for safety if numberRows <= 0 { - // TODO(joechenrh): only for test - numberRows = 1600000 - // numberRows, err = ReadParquetFileRowCountByFile(ctx, cfg.Store, dataFile.FileMeta) - // if err != nil { - // return nil, nil, err - // } + numberRows, err = ReadParquetFileRowCountByFile(ctx, cfg.Store, dataFile.FileMeta) + if err != nil { + return nil, nil, err + } } region := &TableRegion{ DB: cfg.TableMeta.DB, From 3b5fd47a34675de14214243f0b9f0a94a0044140 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 26 May 2025 14:59:50 +0800 Subject: [PATCH 78/93] update DEPS.bzl --- DEPS.bzl | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index fb7a98ba9068c..b1a95c5e0d022 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -1499,6 +1499,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/creack/pty/com_github_creack_pty-v1.1.11.zip", ], ) + go_repository( + name = "com_github_creasty_defaults", + build_file_proto_mode = "disable_global", + importpath = "github.com/creasty/defaults", + sha256 = "d9984bcd4b7326a6066f58bc94b46fe8657e50e1ba0a3ef6eb592b0ff96e6712", + strip_prefix = "github.com/creasty/defaults@v1.8.0", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "http://ats.apps.svc/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/creasty/defaults/com_github_creasty_defaults-v1.8.0.zip", + ], + ) go_repository( name = "com_github_curioswitch_go_reassign", build_file_proto_mode = "disable_global", @@ -3024,26 +3037,26 @@ def go_deps(): name = "com_github_golang_jwt_jwt_v4", build_file_proto_mode = "disable_global", importpath = "github.com/golang-jwt/jwt/v4", - sha256 = "ec5ee69a31fd478fc197fddce7c06dad1abe7543095a55c4ee6546ae79d99a0f", - strip_prefix = "github.com/golang-jwt/jwt/v4@v4.5.2", + sha256 = "a05e4849f6b52d84154e9bc37fca7f340bb85d9cce2ce180a09ae70758f6890c", + strip_prefix = "github.com/golang-jwt/jwt/v4@v4.5.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", - "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", + "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", ], ) go_repository( name = "com_github_golang_jwt_jwt_v5", build_file_proto_mode = "disable_global", importpath = "github.com/golang-jwt/jwt/v5", - sha256 = "278980d9e52498b7c54baf21fed203b942aa1d08b7f62eec494110b61b6fd3c9", - strip_prefix = "github.com/golang-jwt/jwt/v5@v5.2.2", + sha256 = "ad5cdc5c6bac562a2b890e96347208ffdb30a940243b558465ab7de90913a180", + strip_prefix = "github.com/golang-jwt/jwt/v5@v5.2.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", - "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", + "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", ], ) go_repository( @@ -4303,13 +4316,13 @@ def go_deps(): name = "com_github_joechenrh_arrow_go_v18", build_file_proto_mode = "disable_global", importpath = "github.com/joechenrh/arrow-go/v18", - sha256 = "f0cfa403295cb81867af7282c5593654c2c659751460dc5d183560528c479fde", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250215045230-203e420514b7", + sha256 = "801a70a732e926caee0cf27b99c95267e6fa7d99deec1e64210d014bd58ab0ae", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250305032250-07d568e83cc0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250215045230-203e420514b7.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", ], ) go_repository( From 4136e3f6b5ec91503d10cd4770eeb083572e8c4b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 27 May 2025 10:00:17 +0800 Subject: [PATCH 79/93] totally remove xitongsys/parquet-go --- DEPS.bzl | 595 +------------------ br/pkg/storage/gcs.go | 2 - br/pkg/storage/s3.go | 2 - go.mod | 4 - go.sum | 171 ------ lightning/pkg/importer/BUILD.bazel | 2 - lightning/pkg/importer/get_pre_info_test.go | 41 +- lightning/pkg/importer/table_import.go | 1 - lightning/pkg/importer/testdata/test.parquet | Bin 0 -> 572 bytes pkg/lightning/mydump/BUILD.bazel | 9 +- pkg/lightning/mydump/allocator.go | 21 +- pkg/lightning/mydump/loader.go | 15 +- pkg/lightning/mydump/loader_test.go | 95 +-- pkg/lightning/mydump/parquet_parser.go | 7 +- pkg/lightning/mydump/parquet_parser_test.go | 237 +++++--- pkg/lightning/mydump/parquet_writer.go | 153 +++++ tools/gen-parquet/BUILD.bazel | 6 +- tools/gen-parquet/main.go | 139 ++++- 18 files changed, 538 insertions(+), 962 deletions(-) create mode 100644 lightning/pkg/importer/testdata/test.parquet create mode 100644 pkg/lightning/mydump/parquet_writer.go diff --git a/DEPS.bzl b/DEPS.bzl index b1a95c5e0d022..489d5ad595347 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -121,45 +121,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/aclements/go-moremath/com_github_aclements_go_moremath-v0.0.0-20210112150236-f10218a38794.zip", ], ) - go_repository( - name = "com_github_ajstarks_deck", - build_file_proto_mode = "disable_global", - importpath = "github.com/ajstarks/deck", - sha256 = "68bad2e38bf5b01e6bbd7b9bbdba35da94dac72bc4ba41f8ea5fe92aa836a3c3", - strip_prefix = "github.com/ajstarks/deck@v0.0.0-20200831202436-30c9fc6549a9", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/ajstarks/deck/com_github_ajstarks_deck-v0.0.0-20200831202436-30c9fc6549a9.zip", - "http://ats.apps.svc/gomod/github.com/ajstarks/deck/com_github_ajstarks_deck-v0.0.0-20200831202436-30c9fc6549a9.zip", - "https://cache.hawkingrei.com/gomod/github.com/ajstarks/deck/com_github_ajstarks_deck-v0.0.0-20200831202436-30c9fc6549a9.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ajstarks/deck/com_github_ajstarks_deck-v0.0.0-20200831202436-30c9fc6549a9.zip", - ], - ) - go_repository( - name = "com_github_ajstarks_deck_generate", - build_file_proto_mode = "disable_global", - importpath = "github.com/ajstarks/deck/generate", - sha256 = "dce1cbc4cb42ac26512dd0bccf997baeea99fb4595cd419a28e8566d2d7c7ba8", - strip_prefix = "github.com/ajstarks/deck/generate@v0.0.0-20210309230005-c3f852c02e19", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/ajstarks/deck/generate/com_github_ajstarks_deck_generate-v0.0.0-20210309230005-c3f852c02e19.zip", - "http://ats.apps.svc/gomod/github.com/ajstarks/deck/generate/com_github_ajstarks_deck_generate-v0.0.0-20210309230005-c3f852c02e19.zip", - "https://cache.hawkingrei.com/gomod/github.com/ajstarks/deck/generate/com_github_ajstarks_deck_generate-v0.0.0-20210309230005-c3f852c02e19.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ajstarks/deck/generate/com_github_ajstarks_deck_generate-v0.0.0-20210309230005-c3f852c02e19.zip", - ], - ) - go_repository( - name = "com_github_ajstarks_svgo", - build_file_proto_mode = "disable_global", - importpath = "github.com/ajstarks/svgo", - sha256 = "e25b5dbb6cc86d2a0b5db08aad757c534681c2cecb30d84746e09c661cbd7c6f", - strip_prefix = "github.com/ajstarks/svgo@v0.0.0-20211024235047-1546f124cd8b", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/ajstarks/svgo/com_github_ajstarks_svgo-v0.0.0-20211024235047-1546f124cd8b.zip", - "http://ats.apps.svc/gomod/github.com/ajstarks/svgo/com_github_ajstarks_svgo-v0.0.0-20211024235047-1546f124cd8b.zip", - "https://cache.hawkingrei.com/gomod/github.com/ajstarks/svgo/com_github_ajstarks_svgo-v0.0.0-20211024235047-1546f124cd8b.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ajstarks/svgo/com_github_ajstarks_svgo-v0.0.0-20211024235047-1546f124cd8b.zip", - ], - ) go_repository( name = "com_github_alecthomas_chroma_v2", build_file_proto_mode = "disable_global", @@ -368,19 +329,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/Antonboom/testifylint/com_github_antonboom_testifylint-v1.6.1.zip", ], ) - go_repository( - name = "com_github_apache_arrow_go_v12", - build_file_proto_mode = "disable_global", - importpath = "github.com/apache/arrow/go/v12", - sha256 = "5eb05ed9c2c5e164503b00912b7b2456400578de29e7e8a8956a41acd861ab5b", - strip_prefix = "github.com/apache/arrow/go/v12@v12.0.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/apache/arrow/go/v12/com_github_apache_arrow_go_v12-v12.0.1.zip", - "http://ats.apps.svc/gomod/github.com/apache/arrow/go/v12/com_github_apache_arrow_go_v12-v12.0.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/apache/arrow/go/v12/com_github_apache_arrow_go_v12-v12.0.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/apache/arrow/go/v12/com_github_apache_arrow_go_v12-v12.0.1.zip", - ], - ) go_repository( name = "com_github_apache_skywalking_eyes", build_file_proto_mode = "disable_global", @@ -771,19 +719,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/bombsimon/wsl/v4/com_github_bombsimon_wsl_v4-v4.7.0.zip", ], ) - go_repository( - name = "com_github_boombuler_barcode", - build_file_proto_mode = "disable_global", - importpath = "github.com/boombuler/barcode", - sha256 = "812c5beeaa87864227f9d92a9ae71792dc0e6302a33737a91aabe1e511cde42b", - strip_prefix = "github.com/boombuler/barcode@v1.0.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "http://ats.apps.svc/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - ], - ) go_repository( name = "com_github_breml_bidichk", build_file_proto_mode = "disable_global", @@ -1191,13 +1126,13 @@ def go_deps(): name = "com_github_cncf_udpa_go", build_file_proto_mode = "disable_global", importpath = "github.com/cncf/udpa/go", - sha256 = "a449fa94e58117a79c17577e39f72f695c4876f74cbd9142d512278192ca90aa", - strip_prefix = "github.com/cncf/udpa/go@v0.0.0-20210930031921-04548b0d99d4", + sha256 = "f2a2fee0b2024946ddd3b7ec5cd06a6d318cdb8421a8d5afff4c2fd69f1e74a7", + strip_prefix = "github.com/cncf/udpa/go@v0.0.0-20191209042840-269d4d468f6f", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20210930031921-04548b0d99d4.zip", - "http://ats.apps.svc/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20210930031921-04548b0d99d4.zip", - "https://cache.hawkingrei.com/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20210930031921-04548b0d99d4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20210930031921-04548b0d99d4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20191209042840-269d4d468f6f.zip", + "http://ats.apps.svc/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20191209042840-269d4d468f6f.zip", + "https://cache.hawkingrei.com/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20191209042840-269d4d468f6f.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/udpa/go/com_github_cncf_udpa_go-v0.0.0-20191209042840-269d4d468f6f.zip", ], ) go_repository( @@ -1330,19 +1265,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/codegangsta/inject/com_github_codegangsta_inject-v0.0.0-20150114235600-33e0aa1cb7c0.zip", ], ) - go_repository( - name = "com_github_colinmarc_hdfs_v2", - build_file_proto_mode = "disable_global", - importpath = "github.com/colinmarc/hdfs/v2", - sha256 = "6a40084f999e3ddbd9a8566b1333646424201fc2ad28aa1a40ddf51aaf8fbc51", - strip_prefix = "github.com/colinmarc/hdfs/v2@v2.1.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/colinmarc/hdfs/v2/com_github_colinmarc_hdfs_v2-v2.1.1.zip", - "http://ats.apps.svc/gomod/github.com/colinmarc/hdfs/v2/com_github_colinmarc_hdfs_v2-v2.1.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/colinmarc/hdfs/v2/com_github_colinmarc_hdfs_v2-v2.1.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/colinmarc/hdfs/v2/com_github_colinmarc_hdfs_v2-v2.1.1.zip", - ], - ) go_repository( name = "com_github_containerd_cgroups_v3", build_file_proto_mode = "disable_global", @@ -2110,19 +2032,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/flosch/pongo2/v4/com_github_flosch_pongo2_v4-v4.0.2.zip", ], ) - go_repository( - name = "com_github_fogleman_gg", - build_file_proto_mode = "disable_global", - importpath = "github.com/fogleman/gg", - sha256 = "792f7a3ea9eea31b7947dabaf9d5a307389245069078e4bf435d76cb0505439c", - strip_prefix = "github.com/fogleman/gg@v1.3.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/fogleman/gg/com_github_fogleman_gg-v1.3.0.zip", - "http://ats.apps.svc/gomod/github.com/fogleman/gg/com_github_fogleman_gg-v1.3.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/fogleman/gg/com_github_fogleman_gg-v1.3.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/fogleman/gg/com_github_fogleman_gg-v1.3.0.zip", - ], - ) go_repository( name = "com_github_frankban_quicktest", build_file_proto_mode = "disable_global", @@ -2305,58 +2214,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-errors/errors/com_github_go_errors_errors-v1.4.2.zip", ], ) - go_repository( - name = "com_github_go_fonts_dejavu", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-fonts/dejavu", - sha256 = "c2094ce49cfc24b7b7a041e54d924e311322b73a8e56db28ff179fcd403b4111", - strip_prefix = "github.com/go-fonts/dejavu@v0.1.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-fonts/dejavu/com_github_go_fonts_dejavu-v0.1.0.zip", - "http://ats.apps.svc/gomod/github.com/go-fonts/dejavu/com_github_go_fonts_dejavu-v0.1.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-fonts/dejavu/com_github_go_fonts_dejavu-v0.1.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-fonts/dejavu/com_github_go_fonts_dejavu-v0.1.0.zip", - ], - ) - go_repository( - name = "com_github_go_fonts_latin_modern", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-fonts/latin-modern", - sha256 = "037085a80ad108287e772d064d64bb72deb62514de84ef610506bc079f330ec0", - strip_prefix = "github.com/go-fonts/latin-modern@v0.2.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-fonts/latin-modern/com_github_go_fonts_latin_modern-v0.2.0.zip", - "http://ats.apps.svc/gomod/github.com/go-fonts/latin-modern/com_github_go_fonts_latin_modern-v0.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-fonts/latin-modern/com_github_go_fonts_latin_modern-v0.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-fonts/latin-modern/com_github_go_fonts_latin_modern-v0.2.0.zip", - ], - ) - go_repository( - name = "com_github_go_fonts_liberation", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-fonts/liberation", - sha256 = "bd7561251c221fe0fd8cd4c361b062a5796f6f3a1096968b8fecdd61eb82d8fe", - strip_prefix = "github.com/go-fonts/liberation@v0.2.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-fonts/liberation/com_github_go_fonts_liberation-v0.2.0.zip", - "http://ats.apps.svc/gomod/github.com/go-fonts/liberation/com_github_go_fonts_liberation-v0.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-fonts/liberation/com_github_go_fonts_liberation-v0.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-fonts/liberation/com_github_go_fonts_liberation-v0.2.0.zip", - ], - ) - go_repository( - name = "com_github_go_fonts_stix", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-fonts/stix", - sha256 = "51ea5a38b9fda7854af60f280dbd8b40a3e5b5a48eb00d3f8d4e43de3f514ecf", - strip_prefix = "github.com/go-fonts/stix@v0.1.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-fonts/stix/com_github_go_fonts_stix-v0.1.0.zip", - "http://ats.apps.svc/gomod/github.com/go-fonts/stix/com_github_go_fonts_stix-v0.1.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-fonts/stix/com_github_go_fonts_stix-v0.1.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-fonts/stix/com_github_go_fonts_stix-v0.1.0.zip", - ], - ) go_repository( name = "com_github_go_gl_glfw", build_file_proto_mode = "disable_global", @@ -2409,19 +2266,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-kit/log/com_github_go_kit_log-v0.2.1.zip", ], ) - go_repository( - name = "com_github_go_latex_latex", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-latex/latex", - sha256 = "c58be686b31679ad0a51a5d70e60df92fb4bb50a16727caa58b4a67b33f16509", - strip_prefix = "github.com/go-latex/latex@v0.0.0-20210823091927-c0d11ff05a81", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-latex/latex/com_github_go_latex_latex-v0.0.0-20210823091927-c0d11ff05a81.zip", - "http://ats.apps.svc/gomod/github.com/go-latex/latex/com_github_go_latex_latex-v0.0.0-20210823091927-c0d11ff05a81.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-latex/latex/com_github_go_latex_latex-v0.0.0-20210823091927-c0d11ff05a81.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-latex/latex/com_github_go_latex_latex-v0.0.0-20210823091927-c0d11ff05a81.zip", - ], - ) go_repository( name = "com_github_go_ldap_ldap_v3", build_file_proto_mode = "disable_global", @@ -2617,19 +2461,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-openapi/validate/com_github_go_openapi_validate-v0.22.1.zip", ], ) - go_repository( - name = "com_github_go_pdf_fpdf", - build_file_proto_mode = "disable_global", - importpath = "github.com/go-pdf/fpdf", - sha256 = "03a6909fc346ac972b008b77585ac3954d76b416c33b4b64dc22c5f35f0e1edb", - strip_prefix = "github.com/go-pdf/fpdf@v0.6.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/go-pdf/fpdf/com_github_go_pdf_fpdf-v0.6.0.zip", - "http://ats.apps.svc/gomod/github.com/go-pdf/fpdf/com_github_go_pdf_fpdf-v0.6.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/go-pdf/fpdf/com_github_go_pdf_fpdf-v0.6.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/go-pdf/fpdf/com_github_go_pdf_fpdf-v0.6.0.zip", - ], - ) go_repository( name = "com_github_go_playground_locales", build_file_proto_mode = "disable_global", @@ -2877,19 +2708,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-json/com_github_goccy_go_json-v0.10.4.zip", ], ) - go_repository( - name = "com_github_goccy_go_reflect", - build_file_proto_mode = "disable_global", - importpath = "github.com/goccy/go-reflect", - sha256 = "d5d5b55be60c40d1ecfbd13a7e89c3fb5363e8b7cd07e2827f7e987944c41458", - strip_prefix = "github.com/goccy/go-reflect@v1.2.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-reflect/com_github_goccy_go_reflect-v1.2.0.zip", - "http://ats.apps.svc/gomod/github.com/goccy/go-reflect/com_github_goccy_go_reflect-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/goccy/go-reflect/com_github_goccy_go_reflect-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-reflect/com_github_goccy_go_reflect-v1.2.0.zip", - ], - ) go_repository( name = "com_github_goccy_go_yaml", build_file_proto_mode = "disable_global", @@ -2981,19 +2799,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goji/httpauth/com_github_goji_httpauth-v0.0.0-20160601135302-2da839ab0f4d.zip", ], ) - go_repository( - name = "com_github_golang_freetype", - build_file_proto_mode = "disable_global", - importpath = "github.com/golang/freetype", - sha256 = "cdcb9e6a14933dcbf167b44dcd5083fc6a2e52c4fae8fb79747c691efeb7d84e", - strip_prefix = "github.com/golang/freetype@v0.0.0-20170609003504-e2365dfdc4a0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang/freetype/com_github_golang_freetype-v0.0.0-20170609003504-e2365dfdc4a0.zip", - "http://ats.apps.svc/gomod/github.com/golang/freetype/com_github_golang_freetype-v0.0.0-20170609003504-e2365dfdc4a0.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang/freetype/com_github_golang_freetype-v0.0.0-20170609003504-e2365dfdc4a0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang/freetype/com_github_golang_freetype-v0.0.0-20170609003504-e2365dfdc4a0.zip", - ], - ) go_repository( name = "com_github_golang_glog", build_file_proto_mode = "disable_global", @@ -3896,19 +3701,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/hashicorp/go-rootcerts/com_github_hashicorp_go_rootcerts-v1.0.2.zip", ], ) - go_repository( - name = "com_github_hashicorp_go_uuid", - build_file_proto_mode = "disable_global", - importpath = "github.com/hashicorp/go-uuid", - sha256 = "4b8a152aba5b6db8093f240f11f6999c4401c01e13458228ee023c0682cc5c1d", - strip_prefix = "github.com/hashicorp/go-uuid@v0.0.0-20180228145832-27454136f036", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/hashicorp/go-uuid/com_github_hashicorp_go_uuid-v0.0.0-20180228145832-27454136f036.zip", - "http://ats.apps.svc/gomod/github.com/hashicorp/go-uuid/com_github_hashicorp_go_uuid-v0.0.0-20180228145832-27454136f036.zip", - "https://cache.hawkingrei.com/gomod/github.com/hashicorp/go-uuid/com_github_hashicorp_go_uuid-v0.0.0-20180228145832-27454136f036.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/hashicorp/go-uuid/com_github_hashicorp_go_uuid-v0.0.0-20180228145832-27454136f036.zip", - ], - ) go_repository( name = "com_github_hashicorp_go_version", build_file_proto_mode = "disable_global", @@ -4130,19 +3922,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/iris-contrib/schema/com_github_iris_contrib_schema-v0.0.6.zip", ], ) - go_repository( - name = "com_github_jcmturner_gofork", - build_file_proto_mode = "disable_global", - importpath = "github.com/jcmturner/gofork", - sha256 = "dd93724b2bb93705db6f5c8a86f48802cbb4a831883cb82c4ce5b50f5d7fc854", - strip_prefix = "github.com/jcmturner/gofork@v0.0.0-20180107083740-2aebee971930", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/jcmturner/gofork/com_github_jcmturner_gofork-v0.0.0-20180107083740-2aebee971930.zip", - "http://ats.apps.svc/gomod/github.com/jcmturner/gofork/com_github_jcmturner_gofork-v0.0.0-20180107083740-2aebee971930.zip", - "https://cache.hawkingrei.com/gomod/github.com/jcmturner/gofork/com_github_jcmturner_gofork-v0.0.0-20180107083740-2aebee971930.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/jcmturner/gofork/com_github_jcmturner_gofork-v0.0.0-20180107083740-2aebee971930.zip", - ], - ) go_repository( name = "com_github_jedib0t_go_pretty_v6", build_file_proto_mode = "disable_global", @@ -4468,19 +4247,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/julz/importas/com_github_julz_importas-v0.2.0.zip", ], ) - go_repository( - name = "com_github_jung_kurt_gofpdf", - build_file_proto_mode = "disable_global", - importpath = "github.com/jung-kurt/gofpdf", - sha256 = "f0fa70ade137185bbff2f016831a2a456eaadc8d14bc7bf24f0229211820c078", - strip_prefix = "github.com/jung-kurt/gofpdf@v1.0.3-0.20190309125859-24315acbbda5", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/jung-kurt/gofpdf/com_github_jung_kurt_gofpdf-v1.0.3-0.20190309125859-24315acbbda5.zip", - "http://ats.apps.svc/gomod/github.com/jung-kurt/gofpdf/com_github_jung_kurt_gofpdf-v1.0.3-0.20190309125859-24315acbbda5.zip", - "https://cache.hawkingrei.com/gomod/github.com/jung-kurt/gofpdf/com_github_jung_kurt_gofpdf-v1.0.3-0.20190309125859-24315acbbda5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/jung-kurt/gofpdf/com_github_jung_kurt_gofpdf-v1.0.3-0.20190309125859-24315acbbda5.zip", - ], - ) go_repository( name = "com_github_karamaru_alpha_copyloopvar", build_file_proto_mode = "disable_global", @@ -4572,19 +4338,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/kataras/tunnel/com_github_kataras_tunnel-v0.0.4.zip", ], ) - go_repository( - name = "com_github_kballard_go_shellquote", - build_file_proto_mode = "disable_global", - importpath = "github.com/kballard/go-shellquote", - sha256 = "ae4cb7b097dc4eb0c248dff00ed3bbf0f36984c4162ad1d615266084e58bd6cc", - strip_prefix = "github.com/kballard/go-shellquote@v0.0.0-20180428030007-95032a82bc51", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/kballard/go-shellquote/com_github_kballard_go_shellquote-v0.0.0-20180428030007-95032a82bc51.zip", - "http://ats.apps.svc/gomod/github.com/kballard/go-shellquote/com_github_kballard_go_shellquote-v0.0.0-20180428030007-95032a82bc51.zip", - "https://cache.hawkingrei.com/gomod/github.com/kballard/go-shellquote/com_github_kballard_go_shellquote-v0.0.0-20180428030007-95032a82bc51.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/kballard/go-shellquote/com_github_kballard_go_shellquote-v0.0.0-20180428030007-95032a82bc51.zip", - ], - ) go_repository( name = "com_github_kimmachinegun_automemlimit", build_file_proto_mode = "disable_global", @@ -5252,19 +5005,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/mattn/go-runewidth/com_github_mattn_go_runewidth-v0.0.16.zip", ], ) - go_repository( - name = "com_github_mattn_go_sqlite3", - build_file_proto_mode = "disable_global", - importpath = "github.com/mattn/go-sqlite3", - sha256 = "0114d2df439ddeb03eef49a4bf2cc8fb69665c0d76494463cafa7d189a16e0f9", - strip_prefix = "github.com/mattn/go-sqlite3@v1.14.15", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/mattn/go-sqlite3/com_github_mattn_go_sqlite3-v1.14.15.zip", - "http://ats.apps.svc/gomod/github.com/mattn/go-sqlite3/com_github_mattn_go_sqlite3-v1.14.15.zip", - "https://cache.hawkingrei.com/gomod/github.com/mattn/go-sqlite3/com_github_mattn_go_sqlite3-v1.14.15.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/mattn/go-sqlite3/com_github_mattn_go_sqlite3-v1.14.15.zip", - ], - ) go_repository( name = "com_github_matttproud_golang_protobuf_extensions", build_file_proto_mode = "disable_global", @@ -5941,19 +5681,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pbnjay/memory/com_github_pbnjay_memory-v0.0.0-20210728143218-7b4eea64cf58.zip", ], ) - go_repository( - name = "com_github_pborman_getopt", - build_file_proto_mode = "disable_global", - importpath = "github.com/pborman/getopt", - sha256 = "2c7e5c93709a3b3302d63f8239679d5b0c33f1dc0e1a18ce8167fb97df09f90a", - strip_prefix = "github.com/pborman/getopt@v0.0.0-20180729010549-6fdd0a2c7117", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/pborman/getopt/com_github_pborman_getopt-v0.0.0-20180729010549-6fdd0a2c7117.zip", - "http://ats.apps.svc/gomod/github.com/pborman/getopt/com_github_pborman_getopt-v0.0.0-20180729010549-6fdd0a2c7117.zip", - "https://cache.hawkingrei.com/gomod/github.com/pborman/getopt/com_github_pborman_getopt-v0.0.0-20180729010549-6fdd0a2c7117.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pborman/getopt/com_github_pborman_getopt-v0.0.0-20180729010549-6fdd0a2c7117.zip", - ], - ) go_repository( name = "com_github_pelletier_go_toml", build_file_proto_mode = "disable_global", @@ -6006,32 +5733,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/phayes/freeport/com_github_phayes_freeport-v0.0.0-20180830031419-95f893ade6f2.zip", ], ) - go_repository( - name = "com_github_phpdave11_gofpdf", - build_file_proto_mode = "disable_global", - importpath = "github.com/phpdave11/gofpdf", - sha256 = "4db05258f281b40d8a17392fd71648779ea758a9aa506a8d1346ded737ede43f", - strip_prefix = "github.com/phpdave11/gofpdf@v1.4.2", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/phpdave11/gofpdf/com_github_phpdave11_gofpdf-v1.4.2.zip", - "http://ats.apps.svc/gomod/github.com/phpdave11/gofpdf/com_github_phpdave11_gofpdf-v1.4.2.zip", - "https://cache.hawkingrei.com/gomod/github.com/phpdave11/gofpdf/com_github_phpdave11_gofpdf-v1.4.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/phpdave11/gofpdf/com_github_phpdave11_gofpdf-v1.4.2.zip", - ], - ) - go_repository( - name = "com_github_phpdave11_gofpdi", - build_file_proto_mode = "disable_global", - importpath = "github.com/phpdave11/gofpdi", - sha256 = "09b728136cf290f4ee87aa47b60f2f9df2b3f4f64119ff10f12319bc3438b58d", - strip_prefix = "github.com/phpdave11/gofpdi@v1.0.13", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/phpdave11/gofpdi/com_github_phpdave11_gofpdi-v1.0.13.zip", - "http://ats.apps.svc/gomod/github.com/phpdave11/gofpdi/com_github_phpdave11_gofpdi-v1.0.13.zip", - "https://cache.hawkingrei.com/gomod/github.com/phpdave11/gofpdi/com_github_phpdave11_gofpdi-v1.0.13.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/phpdave11/gofpdi/com_github_phpdave11_gofpdi-v1.0.13.zip", - ], - ) go_repository( name = "com_github_pierrec_lz4_v4", build_file_proto_mode = "disable_global", @@ -6591,19 +6292,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/russross/blackfriday/v2/com_github_russross_blackfriday_v2-v2.1.0.zip", ], ) - go_repository( - name = "com_github_ruudk_golang_pdf417", - build_file_proto_mode = "disable_global", - importpath = "github.com/ruudk/golang-pdf417", - sha256 = "f0006c0f60789da76c1b3fef73bb63f5581744fbe3ab5973ec718b40c6822f69", - strip_prefix = "github.com/ruudk/golang-pdf417@v0.0.0-20201230142125-a7e3863a1245", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/ruudk/golang-pdf417/com_github_ruudk_golang_pdf417-v0.0.0-20201230142125-a7e3863a1245.zip", - "http://ats.apps.svc/gomod/github.com/ruudk/golang-pdf417/com_github_ruudk_golang_pdf417-v0.0.0-20201230142125-a7e3863a1245.zip", - "https://cache.hawkingrei.com/gomod/github.com/ruudk/golang-pdf417/com_github_ruudk_golang_pdf417-v0.0.0-20201230142125-a7e3863a1245.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ruudk/golang-pdf417/com_github_ruudk_golang_pdf417-v0.0.0-20201230142125-a7e3863a1245.zip", - ], - ) go_repository( name = "com_github_ryancurrah_gomodguard", build_file_proto_mode = "disable_global", @@ -7750,32 +7438,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/xiang90/probing/com_github_xiang90_probing-v0.0.0-20221125231312-a49e3df8f510.zip", ], ) - go_repository( - name = "com_github_xitongsys_parquet_go", - build_file_proto_mode = "disable_global", - importpath = "github.com/xitongsys/parquet-go", - sha256 = "5b9473cce95cf094d398348fd394002b656ae1363bb5c33c1338fcdcd57e1b33", - strip_prefix = "github.com/xitongsys/parquet-go@v1.6.3-0.20240520233950-75e935fc3e17", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/xitongsys/parquet-go/com_github_xitongsys_parquet_go-v1.6.3-0.20240520233950-75e935fc3e17.zip", - "http://ats.apps.svc/gomod/github.com/xitongsys/parquet-go/com_github_xitongsys_parquet_go-v1.6.3-0.20240520233950-75e935fc3e17.zip", - "https://cache.hawkingrei.com/gomod/github.com/xitongsys/parquet-go/com_github_xitongsys_parquet_go-v1.6.3-0.20240520233950-75e935fc3e17.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/xitongsys/parquet-go/com_github_xitongsys_parquet_go-v1.6.3-0.20240520233950-75e935fc3e17.zip", - ], - ) - go_repository( - name = "com_github_xitongsys_parquet_go_source", - build_file_proto_mode = "disable_global", - importpath = "github.com/xitongsys/parquet-go-source", - sha256 = "9fa786105465c7da0b4d0a3f334b5d284cce486229a0631e5bd962e4dc67cd50", - strip_prefix = "github.com/xitongsys/parquet-go-source@v0.0.0-20200817004010-026bad9b25d0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/xitongsys/parquet-go-source/com_github_xitongsys_parquet_go_source-v0.0.0-20200817004010-026bad9b25d0.zip", - "http://ats.apps.svc/gomod/github.com/xitongsys/parquet-go-source/com_github_xitongsys_parquet_go_source-v0.0.0-20200817004010-026bad9b25d0.zip", - "https://cache.hawkingrei.com/gomod/github.com/xitongsys/parquet-go-source/com_github_xitongsys_parquet_go_source-v0.0.0-20200817004010-026bad9b25d0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/xitongsys/parquet-go-source/com_github_xitongsys_parquet_go_source-v0.0.0-20200817004010-026bad9b25d0.zip", - ], - ) go_repository( name = "com_github_xo_terminfo", build_file_proto_mode = "disable_global", @@ -9492,19 +9154,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.4.zip", ], ) - go_repository( - name = "com_lukechampine_uint128", - build_file_proto_mode = "disable_global", - importpath = "lukechampine.com/uint128", - sha256 = "9ff6e9ad553a69fdb961ab2d92f92cda183ef616a6709c15972c2d4bedf33de5", - strip_prefix = "lukechampine.com/uint128@v1.2.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/lukechampine.com/uint128/com_lukechampine_uint128-v1.2.0.zip", - "http://ats.apps.svc/gomod/lukechampine.com/uint128/com_lukechampine_uint128-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/lukechampine.com/uint128/com_lukechampine_uint128-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/lukechampine.com/uint128/com_lukechampine_uint128-v1.2.0.zip", - ], - ) go_repository( name = "com_shuralyov_dmitri_gpu_mtl", build_file_proto_mode = "disable_global", @@ -9557,19 +9206,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/stathat.com/c/consistent/com_stathat_c_consistent-v1.0.0.zip", ], ) - go_repository( - name = "ht_sr_git_~sbinet_gg", - build_file_proto_mode = "disable_global", - importpath = "git.sr.ht/~sbinet/gg", - sha256 = "435103529c4f24aecf7e4550bc816db2482dda4ee0123d337daba99971a8c498", - strip_prefix = "git.sr.ht/~sbinet/gg@v0.3.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/git.sr.ht/~sbinet/gg/ht_sr_git_~sbinet_gg-v0.3.1.zip", - "http://ats.apps.svc/gomod/git.sr.ht/~sbinet/gg/ht_sr_git_~sbinet_gg-v0.3.1.zip", - "https://cache.hawkingrei.com/gomod/git.sr.ht/~sbinet/gg/ht_sr_git_~sbinet_gg-v0.3.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/git.sr.ht/~sbinet/gg/ht_sr_git_~sbinet_gg-v0.3.1.zip", - ], - ) go_repository( name = "in_gopkg_check_v1", build_file_proto_mode = "disable_global", @@ -9622,71 +9258,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/ini.v1/in_gopkg_ini_v1-v1.67.0.zip", ], ) - go_repository( - name = "in_gopkg_jcmturner_aescts_v1", - build_file_proto_mode = "disable_global", - importpath = "gopkg.in/jcmturner/aescts.v1", - sha256 = "8bfd83c7204032fb16946202d5d643bd9a7e618005bd39578f29030a7d51dcf9", - strip_prefix = "gopkg.in/jcmturner/aescts.v1@v1.0.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gopkg.in/jcmturner/aescts.v1/in_gopkg_jcmturner_aescts_v1-v1.0.1.zip", - "http://ats.apps.svc/gomod/gopkg.in/jcmturner/aescts.v1/in_gopkg_jcmturner_aescts_v1-v1.0.1.zip", - "https://cache.hawkingrei.com/gomod/gopkg.in/jcmturner/aescts.v1/in_gopkg_jcmturner_aescts_v1-v1.0.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/jcmturner/aescts.v1/in_gopkg_jcmturner_aescts_v1-v1.0.1.zip", - ], - ) - go_repository( - name = "in_gopkg_jcmturner_dnsutils_v1", - build_file_proto_mode = "disable_global", - importpath = "gopkg.in/jcmturner/dnsutils.v1", - sha256 = "4fb8b6a5471cb6dda1d0aabd1e01e4d54cb5ee83c395849916392b19153f5203", - strip_prefix = "gopkg.in/jcmturner/dnsutils.v1@v1.0.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gopkg.in/jcmturner/dnsutils.v1/in_gopkg_jcmturner_dnsutils_v1-v1.0.1.zip", - "http://ats.apps.svc/gomod/gopkg.in/jcmturner/dnsutils.v1/in_gopkg_jcmturner_dnsutils_v1-v1.0.1.zip", - "https://cache.hawkingrei.com/gomod/gopkg.in/jcmturner/dnsutils.v1/in_gopkg_jcmturner_dnsutils_v1-v1.0.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/jcmturner/dnsutils.v1/in_gopkg_jcmturner_dnsutils_v1-v1.0.1.zip", - ], - ) - go_repository( - name = "in_gopkg_jcmturner_goidentity_v3", - build_file_proto_mode = "disable_global", - importpath = "gopkg.in/jcmturner/goidentity.v3", - sha256 = "1be44bee93d9080ce89f40827c57e8a396b7c801e2d19a1f5446a4325afa755e", - strip_prefix = "gopkg.in/jcmturner/goidentity.v3@v3.0.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gopkg.in/jcmturner/goidentity.v3/in_gopkg_jcmturner_goidentity_v3-v3.0.0.zip", - "http://ats.apps.svc/gomod/gopkg.in/jcmturner/goidentity.v3/in_gopkg_jcmturner_goidentity_v3-v3.0.0.zip", - "https://cache.hawkingrei.com/gomod/gopkg.in/jcmturner/goidentity.v3/in_gopkg_jcmturner_goidentity_v3-v3.0.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/jcmturner/goidentity.v3/in_gopkg_jcmturner_goidentity_v3-v3.0.0.zip", - ], - ) - go_repository( - name = "in_gopkg_jcmturner_gokrb5_v7", - build_file_proto_mode = "disable_global", - importpath = "gopkg.in/jcmturner/gokrb5.v7", - sha256 = "f7e772eaadb923044924cb86b7a6ed34a3386df831705bb62b6a47dc0819a94b", - strip_prefix = "gopkg.in/jcmturner/gokrb5.v7@v7.3.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gopkg.in/jcmturner/gokrb5.v7/in_gopkg_jcmturner_gokrb5_v7-v7.3.0.zip", - "http://ats.apps.svc/gomod/gopkg.in/jcmturner/gokrb5.v7/in_gopkg_jcmturner_gokrb5_v7-v7.3.0.zip", - "https://cache.hawkingrei.com/gomod/gopkg.in/jcmturner/gokrb5.v7/in_gopkg_jcmturner_gokrb5_v7-v7.3.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/jcmturner/gokrb5.v7/in_gopkg_jcmturner_gokrb5_v7-v7.3.0.zip", - ], - ) - go_repository( - name = "in_gopkg_jcmturner_rpc_v1", - build_file_proto_mode = "disable_global", - importpath = "gopkg.in/jcmturner/rpc.v1", - sha256 = "83d897b60ecb5a66d25232b775ed04c182ca8e02431f351b3768d4d2876d07ae", - strip_prefix = "gopkg.in/jcmturner/rpc.v1@v1.1.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gopkg.in/jcmturner/rpc.v1/in_gopkg_jcmturner_rpc_v1-v1.1.0.zip", - "http://ats.apps.svc/gomod/gopkg.in/jcmturner/rpc.v1/in_gopkg_jcmturner_rpc_v1-v1.1.0.zip", - "https://cache.hawkingrei.com/gomod/gopkg.in/jcmturner/rpc.v1/in_gopkg_jcmturner_rpc_v1-v1.1.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gopkg.in/jcmturner/rpc.v1/in_gopkg_jcmturner_rpc_v1-v1.1.0.zip", - ], - ) go_repository( name = "in_gopkg_mgo_v2", build_file_proto_mode = "disable_global", @@ -10272,19 +9843,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/rsc.io/binaryregexp/io_rsc_binaryregexp-v0.2.0.zip", ], ) - go_repository( - name = "io_rsc_pdf", - build_file_proto_mode = "disable_global", - importpath = "rsc.io/pdf", - sha256 = "79bf310e399cf0e2d8aa61536750d2a6999c5ca884e7a27faf88d3701cd5ba8f", - strip_prefix = "rsc.io/pdf@v0.1.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/rsc.io/pdf/io_rsc_pdf-v0.1.1.zip", - "http://ats.apps.svc/gomod/rsc.io/pdf/io_rsc_pdf-v0.1.1.zip", - "https://cache.hawkingrei.com/gomod/rsc.io/pdf/io_rsc_pdf-v0.1.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/rsc.io/pdf/io_rsc_pdf-v0.1.1.zip", - ], - ) go_repository( name = "io_rsc_quote_v3", build_file_proto_mode = "disable_global", @@ -10324,19 +9882,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/go.starlark.net/net_starlark_go-v0.0.0-20210223155950-e043a3d3c984.zip", ], ) - go_repository( - name = "org_gioui", - build_file_proto_mode = "disable_global", - importpath = "gioui.org", - sha256 = "fcbab2a0ea09ff775c1ff4fa99299d95b94aad496b1ac329e3c7389119168fc0", - strip_prefix = "gioui.org@v0.0.0-20210308172011-57750fc8a0a6", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "http://ats.apps.svc/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "https://cache.hawkingrei.com/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - ], - ) go_repository( name = "org_go_simpler_musttag", build_file_proto_mode = "disable_global", @@ -10523,13 +10068,13 @@ def go_deps(): name = "org_golang_x_image", build_file_proto_mode = "disable_global", importpath = "golang.org/x/image", - sha256 = "56176a4d4d47910d61df9a77aa66a8469ae79fa18b7f5821c43bef1ef212116d", - strip_prefix = "golang.org/x/image@v0.0.0-20220302094943-723b81ca9867", + sha256 = "4a44b498934a95e8f84e8374530de0cab38d81fcd558898d4880c3c5ce1efe47", + strip_prefix = "golang.org/x/image@v0.0.0-20190802002840-cff245a6509b", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20220302094943-723b81ca9867.zip", - "http://ats.apps.svc/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20220302094943-723b81ca9867.zip", - "https://cache.hawkingrei.com/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20220302094943-723b81ca9867.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20220302094943-723b81ca9867.zip", + "http://bazel-cache.pingcap.net:8080/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20190802002840-cff245a6509b.zip", + "http://ats.apps.svc/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20190802002840-cff245a6509b.zip", + "https://cache.hawkingrei.com/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20190802002840-cff245a6509b.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/golang.org/x/image/org_golang_x_image-v0.0.0-20190802002840-cff245a6509b.zip", ], ) go_repository( @@ -10731,65 +10276,13 @@ def go_deps(): name = "org_gonum_v1_netlib", build_file_proto_mode = "disable_global", importpath = "gonum.org/v1/netlib", - sha256 = "eeaeb60f410b86f59d97f15c5ef89096dc72aeb42bae55141738bf9866893938", - strip_prefix = "gonum.org/v1/netlib@v0.0.0-20190313105609-8cb42192e0e0", + sha256 = "35405098d5c2e9ac83607b51a59d9f4e2947166cc7d42aa8fc4bb6dc7be3fb1c", + strip_prefix = "gonum.org/v1/netlib@v0.0.0-20181029234149-ec6d1f5cefe6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20190313105609-8cb42192e0e0.zip", - "http://ats.apps.svc/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20190313105609-8cb42192e0e0.zip", - "https://cache.hawkingrei.com/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20190313105609-8cb42192e0e0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20190313105609-8cb42192e0e0.zip", - ], - ) - go_repository( - name = "org_gonum_v1_plot", - build_file_proto_mode = "disable_global", - importpath = "gonum.org/v1/plot", - sha256 = "eaa47ad966b3b67325c1f3ae704d566332c573b7cca79016cb4ffe82155aab39", - strip_prefix = "gonum.org/v1/plot@v0.10.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gonum.org/v1/plot/org_gonum_v1_plot-v0.10.1.zip", - "http://ats.apps.svc/gomod/gonum.org/v1/plot/org_gonum_v1_plot-v0.10.1.zip", - "https://cache.hawkingrei.com/gomod/gonum.org/v1/plot/org_gonum_v1_plot-v0.10.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gonum.org/v1/plot/org_gonum_v1_plot-v0.10.1.zip", - ], - ) - go_repository( - name = "org_modernc_cc_v3", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/cc/v3", - sha256 = "fe3aeb761e55ce77a95b297321a122b4273aeffe1c08f48fc99310e065211f74", - strip_prefix = "modernc.org/cc/v3@v3.40.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/cc/v3/org_modernc_cc_v3-v3.40.0.zip", - "http://ats.apps.svc/gomod/modernc.org/cc/v3/org_modernc_cc_v3-v3.40.0.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/cc/v3/org_modernc_cc_v3-v3.40.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/cc/v3/org_modernc_cc_v3-v3.40.0.zip", - ], - ) - go_repository( - name = "org_modernc_ccgo_v3", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/ccgo/v3", - sha256 = "bfc293300cd1ce656ba0ce0cee1f508afec2518bc4214a6b10ccfad6e8e6046e", - strip_prefix = "modernc.org/ccgo/v3@v3.16.13", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/ccgo/v3/org_modernc_ccgo_v3-v3.16.13.zip", - "http://ats.apps.svc/gomod/modernc.org/ccgo/v3/org_modernc_ccgo_v3-v3.16.13.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/ccgo/v3/org_modernc_ccgo_v3-v3.16.13.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/ccgo/v3/org_modernc_ccgo_v3-v3.16.13.zip", - ], - ) - go_repository( - name = "org_modernc_ccorpus", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/ccorpus", - sha256 = "3831b62a73a379b81ac927e17e3e9ffe2d44ad07c934505e1ae24eea8a26a6d3", - strip_prefix = "modernc.org/ccorpus@v1.11.6", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/ccorpus/org_modernc_ccorpus-v1.11.6.zip", - "http://ats.apps.svc/gomod/modernc.org/ccorpus/org_modernc_ccorpus-v1.11.6.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/ccorpus/org_modernc_ccorpus-v1.11.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/ccorpus/org_modernc_ccorpus-v1.11.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20181029234149-ec6d1f5cefe6.zip", + "http://ats.apps.svc/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20181029234149-ec6d1f5cefe6.zip", + "https://cache.hawkingrei.com/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20181029234149-ec6d1f5cefe6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/gonum.org/v1/netlib/org_gonum_v1_netlib-v0.0.0-20181029234149-ec6d1f5cefe6.zip", ], ) go_repository( @@ -10818,19 +10311,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/golex/org_modernc_golex-v1.1.0.zip", ], ) - go_repository( - name = "org_modernc_httpfs", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/httpfs", - sha256 = "0b5314649c1327a199397eb6fd52b3ce41c9d3bc6dd2a4dea565b5fb87c13f41", - strip_prefix = "modernc.org/httpfs@v1.0.6", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/httpfs/org_modernc_httpfs-v1.0.6.zip", - "http://ats.apps.svc/gomod/modernc.org/httpfs/org_modernc_httpfs-v1.0.6.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/httpfs/org_modernc_httpfs-v1.0.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/httpfs/org_modernc_httpfs-v1.0.6.zip", - ], - ) go_repository( name = "org_modernc_libc", build_file_proto_mode = "disable_global", @@ -10870,19 +10350,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/memory/org_modernc_memory-v1.7.2.zip", ], ) - go_repository( - name = "org_modernc_opt", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/opt", - sha256 = "294b1b80137cb86292c8893481d545eee90b17b84b6ad1dcb2e6c9bb523a2d9e", - strip_prefix = "modernc.org/opt@v0.1.3", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/opt/org_modernc_opt-v0.1.3.zip", - "http://ats.apps.svc/gomod/modernc.org/opt/org_modernc_opt-v0.1.3.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/opt/org_modernc_opt-v0.1.3.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/opt/org_modernc_opt-v0.1.3.zip", - ], - ) go_repository( name = "org_modernc_parser", build_file_proto_mode = "disable_global", @@ -10935,19 +10402,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/strutil/org_modernc_strutil-v1.2.0.zip", ], ) - go_repository( - name = "org_modernc_tcl", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/tcl", - sha256 = "f966db0dd1ccbc7f8d5ac2e752b64c3be343aa3f92215ed98b6f2a51b7abbb64", - strip_prefix = "modernc.org/tcl@v1.13.2", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/tcl/org_modernc_tcl-v1.13.2.zip", - "http://ats.apps.svc/gomod/modernc.org/tcl/org_modernc_tcl-v1.13.2.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/tcl/org_modernc_tcl-v1.13.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/tcl/org_modernc_tcl-v1.13.2.zip", - ], - ) go_repository( name = "org_modernc_token", build_file_proto_mode = "disable_global", @@ -10974,19 +10428,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/y/org_modernc_y-v1.1.0.zip", ], ) - go_repository( - name = "org_modernc_z", - build_file_proto_mode = "disable_global", - importpath = "modernc.org/z", - sha256 = "5be23ef96669963e52d25b787d71028fff4fe1c468dec20aac59c9512caa2eb7", - strip_prefix = "modernc.org/z@v1.5.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/modernc.org/z/org_modernc_z-v1.5.1.zip", - "http://ats.apps.svc/gomod/modernc.org/z/org_modernc_z-v1.5.1.zip", - "https://cache.hawkingrei.com/gomod/modernc.org/z/org_modernc_z-v1.5.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/modernc.org/z/org_modernc_z-v1.5.1.zip", - ], - ) go_repository( name = "org_mongodb_go_mongo_driver", build_file_proto_mode = "disable_global", diff --git a/br/pkg/storage/gcs.go b/br/pkg/storage/gcs.go index 47be9c9c90142..c7fac10a09581 100644 --- a/br/pkg/storage/gcs.go +++ b/br/pkg/storage/gcs.go @@ -586,8 +586,6 @@ type gcsObjectReader struct { prefetchSize int // reader context used for implement `io.Seek` - // currently, lightning depends on package `xitongsys/parquet-go` to read parquet file and it needs `io.Seeker` - // See: https://github.com/xitongsys/parquet-go/blob/207a3cee75900b2b95213627409b7bac0f190bb3/source/source.go#L9-L10 ctx context.Context } diff --git a/br/pkg/storage/s3.go b/br/pkg/storage/s3.go index 11505814c4540..919ef041a2b5d 100644 --- a/br/pkg/storage/s3.go +++ b/br/pkg/storage/s3.go @@ -966,8 +966,6 @@ type s3ObjectReader struct { pos int64 rangeInfo RangeInfo // reader context used for implement `io.Seek` - // currently, lightning depends on package `xitongsys/parquet-go` to read parquet file and it needs `io.Seeker` - // See: https://github.com/xitongsys/parquet-go/blob/207a3cee75900b2b95213627409b7bac0f190bb3/source/source.go#L9-L10 ctx context.Context prefetchSize int } diff --git a/go.mod b/go.mod index df2aea80801c8..1fa673b68d2ef 100644 --- a/go.mod +++ b/go.mod @@ -118,8 +118,6 @@ require ( github.com/uber/jaeger-client-go v2.22.1+incompatible github.com/vbauerster/mpb/v7 v7.5.3 github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f - github.com/xitongsys/parquet-go v1.6.3-0.20240520233950-75e935fc3e17 - github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 github.com/zyedidia/generic v1.2.1 go.etcd.io/etcd/api/v3 v3.5.12 go.etcd.io/etcd/client/pkg/v3 v3.5.12 @@ -159,12 +157,10 @@ require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/andybalholm/brotli v1.1.1 // indirect - github.com/apache/arrow/go/v12 v12.0.1 // indirect github.com/cockroachdb/errors v1.11.3 // indirect github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/getsentry/sentry-go v0.27.0 // indirect - github.com/goccy/go-reflect v1.2.0 // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect diff --git a/go.sum b/go.sum index 75fb53dbd08df..34f1f4b75c5fe 100644 --- a/go.sum +++ b/go.sum @@ -45,8 +45,6 @@ cloud.google.com/go/storage v1.38.0/go.mod h1:tlUADB0mAb9BgYls9lq+8MGkfzOXuLrnHX dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= -gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= -git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.14.0 h1:nyQWyZvwGTvunIMxi1Y9uXkcyr+I7TeNrr/foo4Kpk8= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.14.0/go.mod h1:l38EPgmsp71HHLq9j7De57JcKOWPyhrsW1Awm1JS6K0= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc= @@ -89,26 +87,17 @@ github.com/YangKeao/ldap/v3 v3.4.5-0.20230421065457-369a3bab1117 h1:+OqGGFc2YHFd github.com/YangKeao/ldap/v3 v3.4.5-0.20230421065457-369a3bab1117/go.mod h1:bMGIq3AGbytbaMwf8wdv5Phdxz0FWHTIYMSzyrYgnQs= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= -github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= -github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= -github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM= github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs= github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 h1:Kk6a4nehpJ3UuJRqlA3JxYxBZEqCeOmATOvrbT4p9RA= github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 h1:Q/yk4z/cHUVZfgTqtD09qeYBxHwshQAjVRX73qs8UH0= github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581/go.mod h1:RcDobYh8k5VP6TNybz9m++gL3ijVI5wueVr0EM10VsU= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg= -github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw= github.com/apache/skywalking-eyes v0.4.0 h1:O13kdRU6FCEZevfD01mdhTgCZLLfPZIQ0GXZrLl7FpQ= github.com/apache/skywalking-eyes v0.4.0/go.mod h1:WblDbBgOLsLN0FJEBa9xj6PhuUA/J6spKYVTG4/F8Ls= -github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= @@ -116,7 +105,6 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/ashanbrown/makezero v1.2.0 h1:/2Lp1bypdmK9wDIq7uWBlDF1iMUpIIS4A+pF6C9IEUU= github.com/ashanbrown/makezero v1.2.0/go.mod h1:dxlPhHbDMC6N6xICzFBSK+4njQDdK8euNO0qjQMtGY4= -github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go v1.44.204/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go v1.44.256/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= @@ -137,8 +125,6 @@ github.com/blacktear23/go-proxyprotocol v1.0.6 h1:eTt6UMpEnq59NjON49b3Cay8Dm0sCs github.com/blacktear23/go-proxyprotocol v1.0.6/go.mod h1:FSCbgnRZrQXazBLL5snfBbrcFSMtcmUDhSRb9OfFA1o= github.com/bmatcuk/doublestar/v2 v2.0.4 h1:6I6oUiT/sU27eE2OFcWqBhL1SwjyvQuOssxT4a1yidI= github.com/bmatcuk/doublestar/v2 v2.0.4/go.mod h1:QMmcs3H2AUQICWhfzLXz+IYln8lRQmTZRptLie8RgRw= -github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= -github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/butuzov/mirror v1.3.0 h1:HdWCXzmwlQHdVhwvsfBb2Au0r3HyINry3bDWLYXiKoc= github.com/butuzov/mirror v1.3.0/go.mod h1:AEij0Z8YMALaq4yQj9CPPVYOyJQyiexpQEQgihajRfI= github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5 h1:BjkPE3785EwPhhyuFkbINB+2a1xATwk8SNDWnJiD41g= @@ -169,11 +155,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudfoundry/gosigar v1.3.6 h1:gIc08FbB3QPb+nAQhINIK/qhf5REKkY0FTGgRGXkcVc= github.com/cloudfoundry/gosigar v1.3.6/go.mod h1:lNWstu5g5gw59O09Y+wsMNFzBSnU8a0u+Sfx4dq360E= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= @@ -188,7 +169,6 @@ github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwP github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= -github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64 h1:W1SHiII3e0jVwvaQFglwu3kS9NLxOeTpvik7MbKCyuQ= github.com/coocood/bbloom v0.0.0-20190830030839-58deb6228d64/go.mod h1:F86k/6c7aDUdwSUevnLpHS/3Q9hzYCE99jGk2xsHnt0= github.com/coocood/freecache v1.2.1 h1:/v1CqMq45NFH9mp/Pt142reundeBM0dVUD3osQBeu/U= @@ -228,7 +208,6 @@ github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa5 github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ= github.com/dolthub/maphash v0.1.0/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4= github.com/dolthub/swiss v0.2.1 h1:gs2osYs5SJkAaH5/ggVJqXQxRXtWshF6uE0lgR/Y3Gw= @@ -241,8 +220,6 @@ github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FM github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= @@ -254,8 +231,6 @@ github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNu github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -271,19 +246,12 @@ github.com/go-asn1-ber/asn1-ber v1.5.4 h1:vXT6d/FNDiELJnLb6hGNa309LMsrCoYFvpwHDF github.com/go-asn1-ber/asn1-ber v1.5.4/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= -github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= -github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= -github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= -github.com/go-fonts/liberation v0.2.0/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= -github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= -github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= -github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= @@ -295,22 +263,16 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= -github.com/go-pdf/fpdf v0.5.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= -github.com/go-pdf/fpdf v0.6.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= github.com/go-resty/resty/v2 v2.11.0 h1:i7jMfNOJYMp69lq7qozJP+bjgzfAzeOhuGlyDrqxT/8= github.com/go-resty/resty/v2 v2.11.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= -github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM= github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/goccy/go-reflect v1.2.0 h1:O0T8rZCuNmGXewnATuKYnkL0xm6o8UNOJZd/gOkb9ms= -github.com/goccy/go-reflect v1.2.0/go.mod h1:n0oYZn8VcV2CkWTxi8B9QjkCoq6GTtCEdfmR66YhFtE= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= @@ -321,7 +283,6 @@ github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQg github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= -github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.2.4 h1:CNNw5U8lSiiBk7druxtSHHTsRWcxKoac6kZKm2peBBc= github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= @@ -337,9 +298,7 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -358,7 +317,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -376,7 +334,6 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -391,8 +348,6 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -428,7 +383,6 @@ github.com/google/skylark v0.0.0-20181101142754-a5f7082aabed h1:rZdD1GeRTHD1aG+V github.com/google/skylark v0.0.0-20181101142754-a5f7082aabed/go.mod h1:CKSX6SxHW1vp20ZNaeGe3TFFBIwCG6vaYrpAiOzX+NA= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= @@ -470,7 +424,6 @@ github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 h1:/c3QmbOGMGTOumP2iT/rCwB7b0QDGLKzqOmktBjT+Is= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1/go.mod h1:5SN9VR2LTsRFsrEC6FHgRbTWrTHu6tqPeKxEQv15giM= -github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.2.1/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= @@ -491,7 +444,6 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY= github.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y= -github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= github.com/jedib0t/go-pretty/v6 v6.2.2 h1:o3McN0rQ4X+IU+HduppSp9TwRdGLRW2rhJXy9CJaCRw= github.com/jedib0t/go-pretty/v6 v6.2.2/go.mod h1:+nE9fyyHGil+PuISTCrp7avEdo6bqoMwqZnuiK2r2a0= github.com/jellydator/ttlcache/v3 v3.0.1 h1:cHgCSMS7TdQcoprXnWUptJZzyFsqs18Lt8VVhRuZYVU= @@ -511,7 +463,6 @@ github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkr github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= @@ -531,11 +482,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/karamaru-alpha/copyloopvar v1.2.1 h1:wmZaZYIjnJ0b5UoKDjUHrikcV0zuPyyxI4SVplLd2CI= github.com/karamaru-alpha/copyloopvar v1.2.1/go.mod h1:nFmMlFNlClC2BPvNaHMdkirmTJxVCY0lhxBtlfOypMM= -github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/errcheck v1.9.0 h1:9xt1zI9EBfcYBvdU1nVrzMzzUPUtPKs9bVSIM3TAb3M= @@ -544,21 +492,15 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -591,8 +533,6 @@ github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= @@ -600,7 +540,6 @@ github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRC github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mgechev/revive v1.9.0 h1:8LaA62XIKrb8lM6VsBSQ92slt/o92z5+hTw3CmrvSrM= github.com/mgechev/revive v1.9.0/go.mod h1:LAPq3+MgOf7GcL5PlWIkHb0PT7XH4NuC2LdWymhb9Mo= @@ -662,16 +601,11 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9 github.com/otiai10/mint v1.3.1/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= -github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7 h1:Dx7Ovyv/SFnMFw3fD4oEoeorXc6saIiQ23LrGLth0Gw= github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2 h1:JhzVVoYvbOACxoUmOs6V/G4D5nPVUW73rKvXxP4XUJc= github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE= -github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= -github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= -github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= -github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pingcap/badger v1.5.1-0.20241015064302-38533b6cbf8d h1:eHcokyHxm7HVM+7+Qy1zZwC7NhX9wVNX8oQDcSZw1qI= @@ -736,7 +670,6 @@ github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJ github.com/qri-io/jsonpointer v0.1.1/go.mod h1:DnJPaYgiKu56EuDp8TU5wFLdZIcAnb/uH9v37ZaMV64= github.com/qri-io/jsonschema v0.2.1 h1:NNFoKms+kut6ABPf6xiKNM5214jzxAhDBrPHCJ97Wg0= github.com/qri-io/jsonschema v0.2.1/go.mod h1:g7DPkiOsK1xv6T/Ao5scXRkd+yTFygcANPBaaqW+VrI= -github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= @@ -747,14 +680,11 @@ github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= -github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= github.com/sasha-s/go-deadlock v0.3.5 h1:tNCOEEDG6tBqrNDOX35j/7hL5FcFViG6awUGROb2NsU= @@ -795,7 +725,6 @@ github.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 h1:8ZnTA2 github.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67/go.mod h1:tNZjgbYncKL5HxvDULAr/mWDmFz4B7H8yrXEDlnoIiw= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= -github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= @@ -880,12 +809,6 @@ github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f h1:9DDCDwOyEy github.com/wangjohn/quickselect v0.0.0-20161129230411-ed8402a42d5f/go.mod h1:8sdOQnirw1PrcnTJYkmW1iOHtUmblMmGdUOHyWYycLI= github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 h1:S2dVYn90KE98chqDkyE9Z4N61UnQd+KOfgp5Iu53llk= github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx0K/GyB0o2bww= -github.com/xitongsys/parquet-go v1.6.3-0.20240520233950-75e935fc3e17 h1:mr+7gGPUasLmH3/5Iv1zwQwiY0WgGO21Ym7Q4FVw+xs= -github.com/xitongsys/parquet-go v1.6.3-0.20240520233950-75e935fc3e17/go.mod h1:u9udtIEWeBkphB2isZ8V8xVIMWgcUobH+7FRMO/Ld6c= -github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA= -github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 h1:a742S4V5A15F93smuVxA60LQWsrCnN8bKeWDBARU1/k= -github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= @@ -949,7 +872,6 @@ go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucg go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= go.starlark.net v0.0.0-20210223155950-e043a3d3c984/go.mod h1:t3mmBBPzAVvK0L0n1drDmrQsJ8FoIx4INCqVMTr/Zo0= @@ -984,7 +906,6 @@ go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -1000,37 +921,22 @@ golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac h1:TSSpLIG4v+p0rPv1pNOQtl1I8knsO4S9trOxNMOLVP4= golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= -golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.0.0-20210607152325-775e3b0c77b9/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= -golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= -golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= -golang.org/x/image v0.0.0-20220302094943-723b81ca9867/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1051,7 +957,6 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= @@ -1101,7 +1006,6 @@ golang.org/x/net v0.0.0-20220517181318-183a9ca12b87/go.mod h1:CfG3xpIq0wQ8r1q4Su golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= @@ -1168,9 +1072,7 @@ golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1187,7 +1089,6 @@ golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220909162455-aba9fc2a8ff2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1216,7 +1117,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= @@ -1236,7 +1136,6 @@ golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -1250,7 +1149,6 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190829051458-42f498d34c4d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1281,14 +1179,11 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1-0.20210205202024-ef80cdb6ec6d/go.mod h1:9bzcO0MWcOuT0tm1iBGzDVPshzfwoVvREIui8C+MHqU= golang.org/x/tools v0.1.1-0.20210302220138-2ac05c832e1a/go.mod h1:9bzcO0MWcOuT0tm1iBGzDVPshzfwoVvREIui8C+MHqU= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= @@ -1303,21 +1198,12 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= -gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= -gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= -gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= -gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= -gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= -gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= -gonum.org/v1/plot v0.10.1/go.mod h1:VZW5OlhkL1mysU9vaqNHnsy86inf6Ot+jB3r+BczCEo= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= @@ -1396,8 +1282,6 @@ google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= @@ -1414,9 +1298,7 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1428,11 +1310,6 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= -gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= -gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= -gopkg.in/jcmturner/gokrb5.v7 v7.3.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= -gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= @@ -1464,7 +1341,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= honnef.co/go/tools v0.6.1 h1:R094WgE8K4JirYjBaOpz/AvTyUu/3wbmAoskKN/pxTI= honnef.co/go/tools v0.6.1/go.mod h1:3puzxxljPCe8RGJX7BIy1plGbxEOZni5mR2aXe3/uk4= k8s.io/api v0.29.11 h1:6FwDo33f1WX5Yu0RQTX9YAd3wth8Ik0B4SXQKsoQfbk= @@ -1477,54 +1353,7 @@ k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= -lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= -modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= -modernc.org/cc/v3 v3.36.2/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= -modernc.org/cc/v3 v3.37.0/go.mod h1:vtL+3mdHx/wcj3iEGz84rQa8vEqR6XM84v5Lcvfph20= -modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0= -modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc= -modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw= -modernc.org/ccgo/v3 v3.0.0-20220904174949-82d86e1b6d56/go.mod h1:YSXjPL62P2AMSxBphRHPn7IkzhVHqkvOnRKAKh+W6ZI= -modernc.org/ccgo/v3 v3.16.4/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= -modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= -modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws= -modernc.org/ccgo/v3 v3.16.9/go.mod h1:zNMzC9A9xeNUepy6KuZBbugn3c0Mc9TeiJO4lgvkJDo= -modernc.org/ccgo/v3 v3.16.13-0.20221017192402-261537637ce8/go.mod h1:fUB3Vn0nVPReA+7IG7yZDfjv1TMWjhQP8gCxrFAtL5g= -modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY= -modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ= -modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= -modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= -modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A= -modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU= -modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= -modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= -modernc.org/libc v1.17.0/go.mod h1:XsgLldpP4aWlPlsjqKRdHPqCxCjISdHfM/yeWC5GyW0= -modernc.org/libc v1.17.4/go.mod h1:WNg2ZH56rDEwdropAJeZPQkXmDwh+JCA1s/htl6r2fA= -modernc.org/libc v1.18.0/go.mod h1:vj6zehR5bfc98ipowQOM2nIDUZnVew/wNC/2tOGS+q0= -modernc.org/libc v1.20.3/go.mod h1:ZRfIaEkgrYgZDl6pa4W39HgN5G/yDW+NRmNKZBDFrk0= -modernc.org/libc v1.21.4/go.mod h1:przBsL5RDOZajTVslkugzLBj1evTue36jEomFQOoYuI= -modernc.org/libc v1.22.2/go.mod h1:uvQavJ1pZ0hIoC/jfqNoMLURIMhKzINIWypNM17puug= -modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= -modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= -modernc.org/memory v1.2.0/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= -modernc.org/memory v1.3.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= -modernc.org/memory v1.4.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= -modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= -modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= -modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= -modernc.org/sqlite v1.18.2/go.mod h1:kvrTLEWgxUcHa2GfHBQtanR1H9ht3hTJNtKpzH9k1u0= -modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= -modernc.org/strutil v1.1.3/go.mod h1:MEHNA7PdEnEwLvspRMtWTNnp2nnyvMfkimT1NKNAGbw= -modernc.org/tcl v1.13.2/go.mod h1:7CLiGIPo1M8Rv1Mitpv5akc2+8fxUd2y2UzC/MfMzy0= -modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= -modernc.org/token v1.0.1/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= -modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= -modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= diff --git a/lightning/pkg/importer/BUILD.bazel b/lightning/pkg/importer/BUILD.bazel index 8513a91065378..7a78e56d50ff3 100644 --- a/lightning/pkg/importer/BUILD.bazel +++ b/lightning/pkg/importer/BUILD.bazel @@ -180,8 +180,6 @@ go_test( "@com_github_tikv_pd_client//:client", "@com_github_tikv_pd_client//http", "@com_github_tikv_pd_client//pkg/caller", - "@com_github_xitongsys_parquet_go//writer", - "@com_github_xitongsys_parquet_go_source//buffer", "@io_etcd_go_etcd_client_v3//:client", "@io_etcd_go_etcd_tests_v3//integration", "@org_uber_go_mock//gomock", diff --git a/lightning/pkg/importer/get_pre_info_test.go b/lightning/pkg/importer/get_pre_info_test.go index aa7d8157ea87b..7b1af9422da8f 100644 --- a/lightning/pkg/importer/get_pre_info_test.go +++ b/lightning/pkg/importer/get_pre_info_test.go @@ -20,13 +20,13 @@ import ( "context" "database/sql" "fmt" - "slices" "strings" "testing" "github.com/DATA-DOG/go-sqlmock" mysql_sql_driver "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" + "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/lightning/pkg/importer/mock" ropts "github.com/pingcap/tidb/lightning/pkg/importer/opts" "github.com/pingcap/tidb/pkg/errno" @@ -36,8 +36,6 @@ import ( "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/types" "github.com/stretchr/testify/require" - pqt_buf_src "github.com/xitongsys/parquet-go-source/buffer" - pqtwriter "github.com/xitongsys/parquet-go/writer" ) type colDef struct { @@ -253,26 +251,24 @@ func TestGetPreInfoGetAllTableStructures(t *testing.T) { } } -func generateParquetData(t *testing.T) []byte { - type parquetStruct struct { - ID int64 `parquet:"name=id, type=INT64"` - Name string `parquet:"name=name, type=BYTE_ARRAY"` - } - pf, err := pqt_buf_src.NewBufferFile(make([]byte, 0)) +func readParquetData(t *testing.T) []byte { + s, err := storage.ParseBackend("./testdata", nil) require.NoError(t, err) - pw, err := pqtwriter.NewParquetWriter(pf, new(parquetStruct), 4) + + store, err := storage.NewWithDefaultOpt(context.Background(), s) require.NoError(t, err) - for i := range 10 { - require.NoError(t, pw.Write(parquetStruct{ - ID: int64(i + 1), - Name: fmt.Sprintf("name_%d", i+1), - })) - } - require.NoError(t, pw.WriteStop()) - require.NoError(t, pf.Close()) - bf, ok := pf.(pqt_buf_src.BufferFile) - require.True(t, ok) - return slices.Clone(bf.Bytes()) + defer store.Close() + + reader, err := store.Open(context.Background(), "test.parquet", nil) + require.NoError(t, err) + defer reader.Close() + + bs := make([]byte, 1024) + l, err := reader.Read(bs) + bs = bs[:l] + require.NoError(t, err) + + return bs } func TestGetPreInfoReadFirstRow(t *testing.T) { @@ -282,7 +278,6 @@ func TestGetPreInfoReadFirstRow(t *testing.T) { 111,"aaa" 222,"bbb" `) - pqtData := generateParquetData(t) const testSQLData01 string = `INSERT INTO db01.tbl01 (ival, sval) VALUES (333, 'ccc'); INSERT INTO db01.tbl01 (ival, sval) VALUES (444, 'ddd');` testDataInfos := []struct { @@ -349,7 +344,7 @@ INSERT INTO db01.tbl01 (ival, sval) VALUES (444, 'ddd');` }, { FileName: "/db01/tbl01/data.005.parquet", - Data: pqtData, + Data: readParquetData(t), FirstN: 3, ExpectFirstRowDatums: [][]types.Datum{ { diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index 3c61ace2ac825..e0a60d8d7dc14 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -780,7 +780,6 @@ ChunkLoop: break } - chunk.FileMeta.ParquetMeta.MemoryQuota = mydump.GetMemoryQuota(rc.cfg.App.RegionConcurrency) cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { setError(err) diff --git a/lightning/pkg/importer/testdata/test.parquet b/lightning/pkg/importer/testdata/test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5c46f14ded682b1d66880075ad10f518479754b1 GIT binary patch literal 572 zcmb7?!A`B- zB4kds)A0Rd;)s9+EV`s%qJf5;m&={JWO0oG1*`J3ZMHeB786Oc)}Puxyn1LFxL+lcDqCxBtP0YBh)K6vgx2;BG!C1(&;^AO~7Z3INWNurV9UFS;)pymR%*oYzFuedUmUmDEd8S&mfq!UEXdx)@9^uY;@d m`J^xprnAu`EO^aIwNfqj4m*DD;Ha-%nlc)6lm4hr_vQ 0 { + typeLen = pc.TypeLen + } + if fields[i], err = schema.NewPrimitiveNodeConverted( + pc.Name, + parquet.Repetitions.Optional, + pc.Type, pc.Converted, + typeLen, pc.Precision, pc.Scale, + -1, + ); err != nil { + return err + } + opts = append(opts, parquet.WithDictionaryFor(pc.Name, true)) + opts = append(opts, parquet.WithCompressionFor(pc.Name, compress.Codecs.Snappy)) + } + + node, _ := schema.NewGroupNode("schema", parquet.Repetitions.Required, fields, -1) + props := parquet.NewWriterProperties(opts...) + pw := file.NewParquetWriter(wrapper, node, file.WithWriterProps(props)) + //nolint: errcheck + defer pw.Close() + + // Only one row group for simplicity + rgw := pw.AppendRowGroup() + //nolint: errcheck + defer rgw.Close() + + for _, pc := range pcolumns { + cw, err := rgw.NextColumn() + if err != nil { + return err + } + vals, defLevel := pc.Gen(rows) + + switch w := cw.(type) { + case *file.Int64ColumnChunkWriter: + buf, _ := vals.([]int64) + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.Float64ColumnChunkWriter: + buf, _ := vals.([]float64) + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.ByteArrayColumnChunkWriter: + buf, _ := vals.([]parquet.ByteArray) + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.Int32ColumnChunkWriter: + buf, _ := vals.([]int32) + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.BooleanColumnChunkWriter: + buf, _ := vals.([]bool) + _, err = w.WriteBatch(buf, defLevel, nil) + default: + return fmt.Errorf("unsupported column type %T", cw) + } + + if err != nil { + return err + } + if err := cw.Close(); err != nil { + return err + } + } + + return nil +} diff --git a/tools/gen-parquet/BUILD.bazel b/tools/gen-parquet/BUILD.bazel index 08eba16858c31..8c8b05315f20a 100644 --- a/tools/gen-parquet/BUILD.bazel +++ b/tools/gen-parquet/BUILD.bazel @@ -6,8 +6,10 @@ go_library( importpath = "github.com/pingcap/tidb/tools/gen-parquet", visibility = ["//visibility:private"], deps = [ - "@com_github_xitongsys_parquet_go//writer", - "@com_github_xitongsys_parquet_go_source//local", + "@com_github_joechenrh_arrow_go_v18//parquet", + "@com_github_joechenrh_arrow_go_v18//parquet/compress", + "@com_github_joechenrh_arrow_go_v18//parquet/file", + "@com_github_joechenrh_arrow_go_v18//parquet/schema", ], ) diff --git a/tools/gen-parquet/main.go b/tools/gen-parquet/main.go index 5f0a98a6c35c8..54bf8bdfacf7d 100644 --- a/tools/gen-parquet/main.go +++ b/tools/gen-parquet/main.go @@ -17,61 +17,142 @@ package main import ( "flag" "fmt" + "io" "log" - "path/filepath" + "os" "strconv" - "github.com/xitongsys/parquet-go-source/local" - "github.com/xitongsys/parquet-go/writer" + "github.com/joechenrh/arrow-go/v18/parquet" + "github.com/joechenrh/arrow-go/v18/parquet/compress" + "github.com/joechenrh/arrow-go/v18/parquet/file" + "github.com/joechenrh/arrow-go/v18/parquet/schema" ) -var ( - schema = flag.String("schema", "test", "Test schema name") - table = flag.String("table", "parquet", "Test table name") - chunks = flag.Int("chunk", 10, "Chunk files count") - rowNumbers = flag.Int("rows", 1000, "Row number for each test file") - sourceDir = flag.String("dir", "", "test directory path") -) +type writeWrapper struct { + Writer *os.File +} -func genParquetFile(dir, name string, count int) error { - type Test struct { - I int32 `parquet:"name=iVal, type=INT32"` - S string `parquet:"name=s, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"` +func (*writeWrapper) Seek(_ int64, _ int) (int64, error) { + return 0, nil +} + +func (*writeWrapper) Read(_ []byte) (int, error) { + return 0, nil +} + +func (w *writeWrapper) Write(b []byte) (int, error) { + return w.Writer.Write(b) +} + +func (w *writeWrapper) Close() error { + return nil +} + +func getParquetWriter(w io.Writer, rowNames []string, rowTypes []parquet.Type) *file.Writer { + fields := make([]schema.Node, len(rowNames)) + for i, name := range rowNames { + fields[i], _ = schema.NewPrimitiveNode( + name, + parquet.Repetitions.Optional, + rowTypes[i], + -1, 8, + ) } - w, err := local.NewLocalFileWriter(filepath.Join(dir, name)) - if err != nil { - return err + node, _ := schema.NewGroupNode("schema", parquet.Repetitions.Required, fields, -1) + schema := schema.NewSchema(node) + + opts := []parquet.WriterProperty{} + for i := range rowNames { + opts = append(opts, parquet.WithDictionaryFor(schema.Column(i).Name(), true)) + opts = append(opts, parquet.WithCompressionFor(schema.Column(i).Name(), compress.Codecs.Snappy)) } + props := parquet.NewWriterProperties(opts...) - test := &Test{} - dataWriter, err := writer.NewParquetWriter(w, test, 2) + return file.NewParquetWriter(w, schema.Root(), file.WithWriterProps(props)) +} + +func writeColumn(rgw file.SerialRowGroupWriter, rows int) error { + cw, err := rgw.NextColumn() if err != nil { return err } - for i := range count { - test.I = int32(i) - test.S = strconv.Itoa(i) - err := dataWriter.Write(test) - if err != nil { - return err + //nolint: errcheck + defer cw.Close() + + defLevel := make([]int16, rows) + for i := range rows { + defLevel[i] = 1 + } + + switch w := cw.(type) { + case *file.Int64ColumnChunkWriter: + buf := make([]int64, rows) + for i := range rows { + buf[i] = int64(i) + } + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.Float64ColumnChunkWriter: + buf := make([]float64, rows) + for i := range rows { + buf[i] = float64(i) } + _, err = w.WriteBatch(buf, defLevel, nil) + case *file.ByteArrayColumnChunkWriter: + buf := make([]parquet.ByteArray, rows) + for i := range rows { + s := strconv.Itoa(i) + buf[i] = []byte(s) + } + _, err = w.WriteBatch(buf, defLevel, nil) + default: + return fmt.Errorf("unsupported column type: %T", w) } - err = dataWriter.WriteStop() + + return err +} + +func writeSimpleParquetFile(filePath string, rows int) error { + file, err := os.Create(filePath) if err != nil { return err } - w.Close() + w := &writeWrapper{Writer: file} + + rowNames := []string{"iVal", "s"} + rowTypes := []parquet.Type{parquet.Types.Int64, parquet.Types.ByteArray} + pw := getParquetWriter(w, rowNames, rowTypes) + //nolint: errcheck + defer pw.Close() + // Only one row group for simplicity + rgw := pw.AppendRowGroup() + //nolint: errcheck + defer rgw.Close() + + for range rowNames { + if err := writeColumn(rgw, rows); err != nil { + return err + } + } return nil } +var ( + schemaName = flag.String("schema", "test", "Test schema name") + tableName = flag.String("table", "parquet", "Test table name") + chunks = flag.Int("chunk", 10, "Chunk files count") + rowNumbers = flag.Int("rows", 1000, "Row number for each test file") + sourceDir = flag.String("dir", "", "test directory path") +) + func main() { flag.Parse() for i := range *chunks { - name := fmt.Sprintf("%s.%s.%04d.parquet", *schema, *table, i) - err := genParquetFile(*sourceDir, name, *rowNumbers) + name := fmt.Sprintf("%s.%s.%04d.parquet", *schemaName, *tableName, i) + filePath := fmt.Sprintf("%s/%s", *sourceDir, name) + err := writeSimpleParquetFile(filePath, *rowNumbers) if err != nil { log.Fatalf("generate test source failed, name: %s, err: %+v", name, err) } From 95c1ebf6987c1e9a12dbdf4452ca1b9079bfa199 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 27 May 2025 13:49:56 +0800 Subject: [PATCH 80/93] fix CI --- go.mod | 9 ++++----- go.sum | 12 ++++++------ pkg/executor/importer/import.go | 7 +++++++ pkg/executor/importer/import_test.go | 2 ++ pkg/lightning/mydump/BUILD.bazel | 2 -- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 1fa673b68d2ef..2716be4d1a9ce 100644 --- a/go.mod +++ b/go.mod @@ -68,6 +68,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.0.1 github.com/jfcg/sorty/v2 v2.1.0 github.com/jingyugao/rowserrcheck v1.1.1 + github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df github.com/karamaru-alpha/copyloopvar v1.2.1 @@ -151,8 +152,6 @@ require ( sourcegraph.com/sourcegraph/appdash-data v0.0.0-20151005221446-73f23eafcf67 ) -require github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 - require ( filippo.io/edwards25519 v1.1.0 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect @@ -181,7 +180,7 @@ require ( require ( cloud.google.com/go v0.112.1 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/compute/metadata v0.3.0 // indirect cloud.google.com/go/iam v1.1.6 // indirect cloud.google.com/go/pubsub v1.36.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect @@ -218,8 +217,8 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect github.com/goccy/go-json v0.10.4 // indirect - github.com/golang-jwt/jwt/v4 v4.5.1 // indirect - github.com/golang-jwt/jwt/v5 v5.2.1 // indirect + github.com/golang-jwt/jwt/v4 v4.5.2 // indirect + github.com/golang-jwt/jwt/v5 v5.2.2 // indirect github.com/golang/glog v1.2.4 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/gofuzz v1.2.0 // indirect diff --git a/go.sum b/go.sum index 34f1f4b75c5fe..93f2114318b59 100644 --- a/go.sum +++ b/go.sum @@ -21,8 +21,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/iam v1.1.6 h1:bEa06k05IO4f4uJonbB5iAgKTPpABy1ayxaIZV/GHVc= @@ -279,10 +279,10 @@ github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zV github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A= -github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo= -github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= -github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= -github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI= +github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= +github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.2.4 h1:CNNw5U8lSiiBk7druxtSHHTsRWcxKoac6kZKm2peBBc= github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index a343998704a2d..20265a6bba027 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -29,6 +29,7 @@ import ( "unicode/utf8" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" tidb "github.com/pingcap/tidb/pkg/config" @@ -1211,6 +1212,10 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { } } + failpoint.Inject("skipReadFiles", func() { + failpoint.Goto("afterReadFiles") + }) + // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { _, memoryUsageStream, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) @@ -1232,6 +1237,8 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { e.Plan.EncodeThreadCnt = encodeThreadCnt } + failpoint.Label("afterReadFiles") + e.dataFiles = dataFiles e.TotalFileSize = totalSize return nil diff --git a/pkg/executor/importer/import_test.go b/pkg/executor/importer/import_test.go index d338d90ccfa2e..52b6af4b9d709 100644 --- a/pkg/executor/importer/import_test.go +++ b/pkg/executor/importer/import_test.go @@ -35,6 +35,7 @@ import ( plannercore "github.com/pingcap/tidb/pkg/planner/core" plannerutil "github.com/pingcap/tidb/pkg/planner/util" "github.com/pingcap/tidb/pkg/sessionctx/vardef" + "github.com/pingcap/tidb/pkg/testkit/testfailpoint" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" "github.com/pingcap/tidb/pkg/util/logutil" @@ -422,6 +423,7 @@ func TestSupportedSuffixForServerDisk(t *testing.T) { fileNames: []string{"file3.PARQUET", "file3.parquet.gz", "file3.PARQUET.GZIP", "file3.parquet.zstd", "file3.parquet.zst", "file3.parquet.snappy", "file3.parquet.snappy"}, }, } + testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/executor/importer/skipReadFiles", "return()") for _, testcase := range testcases { for _, fileName := range testcase.fileNames { c.Format = DataFormatAuto diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 4b282e644d717..fd2a6dd1f44e8 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -107,8 +107,6 @@ go_test( "@com_github_data_dog_go_sqlmock//:go-sqlmock", "@com_github_go_sql_driver_mysql//:mysql", "@com_github_joechenrh_arrow_go_v18//parquet", - "@com_github_joechenrh_arrow_go_v18//parquet/compress", - "@com_github_joechenrh_arrow_go_v18//parquet/file", "@com_github_joechenrh_arrow_go_v18//parquet/schema", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", From 113817d26d5bccdf05a2366e91c0458b9b2d66ef Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 27 May 2025 14:01:49 +0800 Subject: [PATCH 81/93] update DEPS.bzl --- DEPS.bzl | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 489d5ad595347..5c7b02b7a2e89 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -2842,26 +2842,26 @@ def go_deps(): name = "com_github_golang_jwt_jwt_v4", build_file_proto_mode = "disable_global", importpath = "github.com/golang-jwt/jwt/v4", - sha256 = "a05e4849f6b52d84154e9bc37fca7f340bb85d9cce2ce180a09ae70758f6890c", - strip_prefix = "github.com/golang-jwt/jwt/v4@v4.5.1", + sha256 = "ec5ee69a31fd478fc197fddce7c06dad1abe7543095a55c4ee6546ae79d99a0f", + strip_prefix = "github.com/golang-jwt/jwt/v4@v4.5.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", - "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", + "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v4/com_github_golang_jwt_jwt_v4-v4.5.2.zip", ], ) go_repository( name = "com_github_golang_jwt_jwt_v5", build_file_proto_mode = "disable_global", importpath = "github.com/golang-jwt/jwt/v5", - sha256 = "ad5cdc5c6bac562a2b890e96347208ffdb30a940243b558465ab7de90913a180", - strip_prefix = "github.com/golang-jwt/jwt/v5@v5.2.1", + sha256 = "278980d9e52498b7c54baf21fed203b942aa1d08b7f62eec494110b61b6fd3c9", + strip_prefix = "github.com/golang-jwt/jwt/v5@v5.2.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", - "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", + "http://ats.apps.svc/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", + "https://cache.hawkingrei.com/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/golang-jwt/jwt/v5/com_github_golang_jwt_jwt_v5-v5.2.2.zip", ], ) go_repository( @@ -7962,13 +7962,13 @@ def go_deps(): name = "com_google_cloud_go_compute_metadata", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/compute/metadata", - sha256 = "5325feb8adc47daf4e4e74e21922c3e12b8f6201571b2aa3f7b413771190c2a3", - strip_prefix = "cloud.google.com/go/compute/metadata@v0.5.0", + sha256 = "c0ab79c30870c1aa9912fb0fdcb043e0044782825988e40f59401d227976b677", + strip_prefix = "cloud.google.com/go/compute/metadata@v0.3.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.5.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/metadata/com_google_cloud_go_compute_metadata-v0.3.0.zip", ], ) go_repository( From 6ec630abf411f4eeafdf2f9d1b653ca303f49ed0 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 27 May 2025 15:06:23 +0800 Subject: [PATCH 82/93] fix test --- pkg/lightning/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index ac84647d7d99c..689a16b4a9875 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -1464,7 +1464,7 @@ func NewConfig() *Config { RegionConcurrency: runtime.NumCPU(), TableConcurrency: 0, IndexConcurrency: 0, - MaxMemoryUsage: 0, + MaxMemoryUsage: 40, IOConcurrency: 5, CheckRequirements: true, TaskInfoSchemaName: defaultTaskInfoSchemaName, From e8e313e4611da27403700ee38b83cd35148445d3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 29 Jul 2025 17:11:10 +0800 Subject: [PATCH 83/93] fix after merge Signed-off-by: Ruihao Chen --- DEPS.bzl | 1346 ++++++++++++------------ go.mod | 2 +- go.sum | 21 - pkg/lightning/mydump/parquet_parser.go | 9 +- 4 files changed, 666 insertions(+), 712 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index 5f36339663094..e15bb868ccdb0 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -758,19 +758,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/bombsimon/wsl/v5/com_github_bombsimon_wsl_v5-v5.1.0.zip", ], ) - go_repository( - name = "com_github_boombuler_barcode", - build_file_proto_mode = "disable_global", - importpath = "github.com/boombuler/barcode", - sha256 = "812c5beeaa87864227f9d92a9ae71792dc0e6302a33737a91aabe1e511cde42b", - strip_prefix = "github.com/boombuler/barcode@v1.0.1", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "http://ats.apps.svc/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/boombuler/barcode/com_github_boombuler_barcode-v1.0.1.zip", - ], - ) go_repository( name = "com_github_breml_bidichk", build_file_proto_mode = "disable_global", @@ -7689,325 +7676,325 @@ def go_deps(): name = "com_google_cloud_go_accessapproval", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/accessapproval", - sha256 = "a7c5416a866079d36da0d874a7fe56887bf8ce8cce3174fa4a793f08965c0eea", - strip_prefix = "cloud.google.com/go/accessapproval@v1.7.6", + sha256 = "48066ab6a359de0c060f5f427ae5c7ee0d10080b197d18dc1f2bd7108d16f9f3", + strip_prefix = "cloud.google.com/go/accessapproval@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/accessapproval/com_google_cloud_go_accessapproval-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_accesscontextmanager", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/accesscontextmanager", - sha256 = "e70510f80a160d30dc7c8a0fde207dcf5cae513e38dc566903323ef42595e5ba", - strip_prefix = "cloud.google.com/go/accesscontextmanager@v1.8.6", + sha256 = "cc7ff5deab5067c41d6f3f68043868f088be52d1ce8582da7601f543ba393be5", + strip_prefix = "cloud.google.com/go/accesscontextmanager@v1.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/accesscontextmanager/com_google_cloud_go_accesscontextmanager-v1.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_aiplatform", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/aiplatform", - sha256 = "4ae41774aeffd2202ed00cdcdf60487cfa114e384ecfbf1cb3862d4322ad4ac3", - strip_prefix = "cloud.google.com/go/aiplatform@v1.66.0", + sha256 = "ad7c373d618de9c619486880fc1803cac8ab0f90238fc6a6aee5c3a870efaff5", + strip_prefix = "cloud.google.com/go/aiplatform@v1.60.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.66.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.66.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.66.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.66.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.60.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.60.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.60.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/aiplatform/com_google_cloud_go_aiplatform-v1.60.0.zip", ], ) go_repository( name = "com_google_cloud_go_analytics", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/analytics", - sha256 = "25f6ff05e654184ff8b3998a940e52ddfe558ccc6768a06f2b5dd38cff19f7ac", - strip_prefix = "cloud.google.com/go/analytics@v0.23.1", + sha256 = "9af1681ba6c9090c51b227f5f26137f6a139258587cc569b367e424f4974e556", + strip_prefix = "cloud.google.com/go/analytics@v0.23.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/analytics/com_google_cloud_go_analytics-v0.23.0.zip", ], ) go_repository( name = "com_google_cloud_go_apigateway", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/apigateway", - sha256 = "88cd110ac5d02e8f8dc27655decbd98b3a739506a2d52978b7d2ec8572c98027", - strip_prefix = "cloud.google.com/go/apigateway@v1.6.6", + sha256 = "66cb6ae25ac2d5e983c2281f9b68ae72baef1697e55eace91360606c7cebd22f", + strip_prefix = "cloud.google.com/go/apigateway@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigateway/com_google_cloud_go_apigateway-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_apigeeconnect", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/apigeeconnect", - sha256 = "ccc3dfcc5d27545538950de439bd6d21fc3402c84397f3cf30042c86116c5491", - strip_prefix = "cloud.google.com/go/apigeeconnect@v1.6.6", + sha256 = "2047e90bdc5a103ceab7747f6afc8ba7cd3e62333a408d8d97dd44ca30f7b125", + strip_prefix = "cloud.google.com/go/apigeeconnect@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigeeconnect/com_google_cloud_go_apigeeconnect-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_apigeeregistry", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/apigeeregistry", - sha256 = "e776dbe59cc5095781c5cdc350d2759c56a35bfd73e150986c4e95695b92f005", - strip_prefix = "cloud.google.com/go/apigeeregistry@v0.8.4", + sha256 = "b30180fda8417c97a5ecd039552c5a45222be85936227267831bbac135870505", + strip_prefix = "cloud.google.com/go/apigeeregistry@v0.8.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/apigeeregistry/com_google_cloud_go_apigeeregistry-v0.8.3.zip", ], ) go_repository( name = "com_google_cloud_go_appengine", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/appengine", - sha256 = "c03275f83ee6e05413dadd5999dd88ef07c212155caae469e9a5ca0733a7a6c5", - strip_prefix = "cloud.google.com/go/appengine@v1.8.6", + sha256 = "55f6ffdadd031dd49c8e07bbc2df97f17025c5b273bc03160c75fff7542c8cec", + strip_prefix = "cloud.google.com/go/appengine@v1.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/appengine/com_google_cloud_go_appengine-v1.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_area120", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/area120", - sha256 = "737542aefc2517293817f0013577fa88c2cd65a3a8517a6ad57d182f997ad10c", - strip_prefix = "cloud.google.com/go/area120@v0.8.6", + sha256 = "215a423244d6e4079ceb47935ef4435e710e15c1d354aef2b7adc91dd2379091", + strip_prefix = "cloud.google.com/go/area120@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/area120/com_google_cloud_go_area120-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_artifactregistry", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/artifactregistry", - sha256 = "995b7bd1f5c12daa2d732f8ceaa8f7fc514e91a4f83370a00d91eee131b5de0d", - strip_prefix = "cloud.google.com/go/artifactregistry@v1.14.8", + sha256 = "811813420ecafb28fd83630ec085c5c8c18048978d357dabfdf56699c34c1b69", + strip_prefix = "cloud.google.com/go/artifactregistry@v1.14.7", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.8.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.8.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.8.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.8.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.7.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.7.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.7.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/artifactregistry/com_google_cloud_go_artifactregistry-v1.14.7.zip", ], ) go_repository( name = "com_google_cloud_go_asset", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/asset", - sha256 = "560d980dfc50e56ab5573e53e26dca22104864a2d26f555f2b475045d7e97548", - strip_prefix = "cloud.google.com/go/asset@v1.18.1", + sha256 = "16a77c7774c87fe0a0f87b772411a1980c077db3f71692de6faa208d9ce45d52", + strip_prefix = "cloud.google.com/go/asset@v1.17.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.18.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.18.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.18.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.18.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.17.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.17.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.17.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/asset/com_google_cloud_go_asset-v1.17.2.zip", ], ) go_repository( name = "com_google_cloud_go_assuredworkloads", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/assuredworkloads", - sha256 = "1f39a845f67df1406b80cf4e94ebc4ac0d3d40e2d82cf8f8357b2934c49b3973", - strip_prefix = "cloud.google.com/go/assuredworkloads@v1.11.6", + sha256 = "232f945a5f780c968089e5c9a03c6081e8c0256aa8d93d4cf1ea1b5e22a0f178", + strip_prefix = "cloud.google.com/go/assuredworkloads@v1.11.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/assuredworkloads/com_google_cloud_go_assuredworkloads-v1.11.5.zip", ], ) go_repository( name = "com_google_cloud_go_automl", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/automl", - sha256 = "7d1c2d2ae85197feedb0cd90383deeb1cd8cf5595e7e1bdce43959509019c184", - strip_prefix = "cloud.google.com/go/automl@v1.13.6", + sha256 = "fd3fd5c3c639bb85331411260f3aca150bac0daea62c37ca1a8f85933a1984d1", + strip_prefix = "cloud.google.com/go/automl@v1.13.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/automl/com_google_cloud_go_automl-v1.13.5.zip", ], ) go_repository( name = "com_google_cloud_go_baremetalsolution", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/baremetalsolution", - sha256 = "cbdf6a0e464b07715b46efcf9f5cda743b84c54a72b184e88431a002dca7bda6", - strip_prefix = "cloud.google.com/go/baremetalsolution@v1.2.5", + sha256 = "cb51f2f4a79130b7ee2144526da55951318df4ed271f1559956e910488c49fbe", + strip_prefix = "cloud.google.com/go/baremetalsolution@v1.2.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/baremetalsolution/com_google_cloud_go_baremetalsolution-v1.2.4.zip", ], ) go_repository( name = "com_google_cloud_go_batch", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/batch", - sha256 = "9a42e598e819035d6f224ef49e10746e321b46483e65dc7e6854f04a7732db39", - strip_prefix = "cloud.google.com/go/batch@v1.8.3", + sha256 = "009c51e5067877c2cb63c16ae70bdced460b73d723f6e318e629632771ab6917", + strip_prefix = "cloud.google.com/go/batch@v1.8.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.3.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.3.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.3.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.3.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/batch/com_google_cloud_go_batch-v1.8.0.zip", ], ) go_repository( name = "com_google_cloud_go_beyondcorp", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/beyondcorp", - sha256 = "b83b9923fc27f9cfdb6a45763de0b61a3f8d45cacfbd5a3e97d3fa4d8971a77b", - strip_prefix = "cloud.google.com/go/beyondcorp@v1.0.5", + sha256 = "c65fae2e6401e2d847a9590df932e86e6226f504357203357e33bb31634f9a16", + strip_prefix = "cloud.google.com/go/beyondcorp@v1.0.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/beyondcorp/com_google_cloud_go_beyondcorp-v1.0.4.zip", ], ) go_repository( name = "com_google_cloud_go_bigquery", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/bigquery", - sha256 = "5076c760a2b1511f427430d82b5c1348d2d8a13a52558216e2f98e4022458eb6", - strip_prefix = "cloud.google.com/go/bigquery@v1.60.0", + sha256 = "50bb376bc1ced07fc35ed7a3e6ebe043b0fec289e17dfabbfe32ef0b5113ca54", + strip_prefix = "cloud.google.com/go/bigquery@v1.59.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.60.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.60.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.60.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.60.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.59.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.59.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.59.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/bigquery/com_google_cloud_go_bigquery-v1.59.1.zip", ], ) go_repository( name = "com_google_cloud_go_billing", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/billing", - sha256 = "980e3188c97135353bc5c721f45a77ed5c8c37418bfa8529b715c0b75512b70c", - strip_prefix = "cloud.google.com/go/billing@v1.18.4", + sha256 = "ff169192f71f00fd632a525600b11550badf5965badc9ea3e537facec86cdbf1", + strip_prefix = "cloud.google.com/go/billing@v1.18.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/billing/com_google_cloud_go_billing-v1.18.2.zip", ], ) go_repository( name = "com_google_cloud_go_binaryauthorization", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/binaryauthorization", - sha256 = "ab6f09d297809da5305723b9602408db406b9473da9f7a658c5d8d1a9b5affac", - strip_prefix = "cloud.google.com/go/binaryauthorization@v1.8.2", + sha256 = "dee98d01d410ad8b4923c657955f96921aeea6166172b7893eb3d1f09c6aaa0a", + strip_prefix = "cloud.google.com/go/binaryauthorization@v1.8.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/binaryauthorization/com_google_cloud_go_binaryauthorization-v1.8.1.zip", ], ) go_repository( name = "com_google_cloud_go_certificatemanager", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/certificatemanager", - sha256 = "dc8ff8b5eee7cb261023e109dff4177d8aae4a1e6b6c9346b66cc008db26f976", - strip_prefix = "cloud.google.com/go/certificatemanager@v1.8.0", + sha256 = "780e8e315a9f7225546b6673356c2229f72219410346b95207ac049511b98841", + strip_prefix = "cloud.google.com/go/certificatemanager@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.8.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.8.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.8.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.8.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/certificatemanager/com_google_cloud_go_certificatemanager-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_channel", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/channel", - sha256 = "00b5b8c159da49d6e33981dd6577464a317ca708d2df7ff5265c95de41d94c88", - strip_prefix = "cloud.google.com/go/channel@v1.17.6", + sha256 = "73db84def08affd03be8b491c903a020eac8b37cb12fb8dcad4eeeaa4993e25d", + strip_prefix = "cloud.google.com/go/channel@v1.17.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/channel/com_google_cloud_go_channel-v1.17.5.zip", ], ) go_repository( name = "com_google_cloud_go_cloudbuild", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/cloudbuild", - sha256 = "5c53087fc6dd304c34043ccfb0ab958d89fedd6c27b844148884fa8b17a6ab8d", - strip_prefix = "cloud.google.com/go/cloudbuild@v1.16.0", + sha256 = "f6ee875558b6af58a958f7e186258352268943552f6aa14b550375ec91a151bd", + strip_prefix = "cloud.google.com/go/cloudbuild@v1.15.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.16.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.16.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.16.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.16.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.15.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.15.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.15.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/cloudbuild/com_google_cloud_go_cloudbuild-v1.15.1.zip", ], ) go_repository( name = "com_google_cloud_go_clouddms", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/clouddms", - sha256 = "59c0d659e1094b0d9137becd3335b8ddd9c5a1e684751a0d9013a2071878c0d1", - strip_prefix = "cloud.google.com/go/clouddms@v1.7.5", + sha256 = "75062644daa91a5d0a8988a779a7b78af897d50d1136d158744a6218ae4be50d", + strip_prefix = "cloud.google.com/go/clouddms@v1.7.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/clouddms/com_google_cloud_go_clouddms-v1.7.4.zip", ], ) go_repository( name = "com_google_cloud_go_cloudtasks", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/cloudtasks", - sha256 = "96627633de03d5ca30e4788b98097c51f93e5b4139ec0b9605d508ea7d4ab885", - strip_prefix = "cloud.google.com/go/cloudtasks@v1.12.7", + sha256 = "2bc9e56b75f82d47c912fdab8a4bdf498e90af3e798c1e36d41c129edfec19c2", + strip_prefix = "cloud.google.com/go/cloudtasks@v1.12.6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.7.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.7.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.7.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.7.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.6.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.6.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/cloudtasks/com_google_cloud_go_cloudtasks-v1.12.6.zip", ], ) go_repository( name = "com_google_cloud_go_compute", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/compute", - sha256 = "5173a017a15f7874e68752a8116556fe0d7e5e11344dd4265c454467bb651cb8", - strip_prefix = "cloud.google.com/go/compute@v1.25.1", + sha256 = "0cf3d4325e378c92ff90cef3d1b7752682a77f0eaa0b11c092cc3ea32e5ed638", + strip_prefix = "cloud.google.com/go/compute@v1.24.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.25.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/compute/com_google_cloud_go_compute-v1.24.0.zip", ], ) go_repository( @@ -8027,143 +8014,143 @@ def go_deps(): name = "com_google_cloud_go_contactcenterinsights", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/contactcenterinsights", - sha256 = "08bb41cddc825d506c308de2e626b9064f93717ba5d46f0e96704276e6137bd2", - strip_prefix = "cloud.google.com/go/contactcenterinsights@v1.13.1", + sha256 = "69a6ecff3f5d040e62b61555d71406bb8c87dbe07addc05d30a0e8bb935d55b0", + strip_prefix = "cloud.google.com/go/contactcenterinsights@v1.13.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/contactcenterinsights/com_google_cloud_go_contactcenterinsights-v1.13.0.zip", ], ) go_repository( name = "com_google_cloud_go_container", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/container", - sha256 = "9dcc75ba98b0933b060f7b9cda65be293f9ab56f70c6c9e3d863c1ff5133ad36", - strip_prefix = "cloud.google.com/go/container@v1.35.0", + sha256 = "840b125be4780c31ba03ea6abcfc55729eb4927f592d21ae0d314d981bc67057", + strip_prefix = "cloud.google.com/go/container@v1.31.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.35.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.35.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.35.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.35.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.31.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.31.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.31.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/container/com_google_cloud_go_container-v1.31.0.zip", ], ) go_repository( name = "com_google_cloud_go_containeranalysis", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/containeranalysis", - sha256 = "033998ae5653d104c29595f5c282c4bec82ed0ca1293ad063687f57be396ae33", - strip_prefix = "cloud.google.com/go/containeranalysis@v0.11.5", + sha256 = "9aa4f7e5cbfa7317beed95fc032d5f9039c4c2881e49a942a4abcfc847150c7a", + strip_prefix = "cloud.google.com/go/containeranalysis@v0.11.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/containeranalysis/com_google_cloud_go_containeranalysis-v0.11.4.zip", ], ) go_repository( name = "com_google_cloud_go_datacatalog", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/datacatalog", - sha256 = "445f8af59920f2119e1ef8a6064b2363c3498eabefb460c53c2b5a6826a008c7", - strip_prefix = "cloud.google.com/go/datacatalog@v1.20.0", + sha256 = "158ea05506494f5a3b82d23c235c15542bfe64837310ce2b8cb4b7fe42536b40", + strip_prefix = "cloud.google.com/go/datacatalog@v1.19.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.20.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.20.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.20.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.20.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.19.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.19.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.19.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datacatalog/com_google_cloud_go_datacatalog-v1.19.3.zip", ], ) go_repository( name = "com_google_cloud_go_dataflow", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dataflow", - sha256 = "9959e754b4d6164a87018a6ec1c247864236affcee8715cb645ca0639042b0d3", - strip_prefix = "cloud.google.com/go/dataflow@v0.9.6", + sha256 = "53a924bc78f46210856c26bd93e9170312391107c511669377104340d0636c3b", + strip_prefix = "cloud.google.com/go/dataflow@v0.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataflow/com_google_cloud_go_dataflow-v0.9.5.zip", ], ) go_repository( name = "com_google_cloud_go_dataform", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dataform", - sha256 = "c60752c51194d4a0cfd26b7783e6813a1586041609d8dafee6a4f73b123fee8d", - strip_prefix = "cloud.google.com/go/dataform@v0.9.3", + sha256 = "d70f87bac2c275cb315b4ce7e4cc202cb9ab0e66ac1055ea4ab16bb829b6e528", + strip_prefix = "cloud.google.com/go/dataform@v0.9.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.3.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.3.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.3.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.3.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataform/com_google_cloud_go_dataform-v0.9.2.zip", ], ) go_repository( name = "com_google_cloud_go_datafusion", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/datafusion", - sha256 = "9ee79e05a3234462f64d009d490fdefb455e3a0802f118726a90ecc92315adf2", - strip_prefix = "cloud.google.com/go/datafusion@v1.7.6", + sha256 = "e8a2869286204a3592a5ca17b9e08f0bd0c8cddc89d9a2145424492cf6117cd1", + strip_prefix = "cloud.google.com/go/datafusion@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datafusion/com_google_cloud_go_datafusion-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_datalabeling", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/datalabeling", - sha256 = "bd62c81f4b68de677018317116c90b1e66a1989d4ac02d6fe2e5df2f687debdc", - strip_prefix = "cloud.google.com/go/datalabeling@v0.8.6", + sha256 = "9d622cbd38c7c7fda283655efeb49e94afe893d26719c9083081f4541dc8fc07", + strip_prefix = "cloud.google.com/go/datalabeling@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datalabeling/com_google_cloud_go_datalabeling-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_dataplex", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dataplex", - sha256 = "8996d48fc33e1e371a9b9f58dfafbf046d8b89c534a8431400247dd990596d9b", - strip_prefix = "cloud.google.com/go/dataplex@v1.15.0", + sha256 = "a1d23438c094389cc0b18c5a342459b699ff74b2c0b8ec81c83d0d3b019283d4", + strip_prefix = "cloud.google.com/go/dataplex@v1.14.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.15.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.15.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.15.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.15.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.14.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.14.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.14.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataplex/com_google_cloud_go_dataplex-v1.14.2.zip", ], ) go_repository( name = "com_google_cloud_go_dataproc_v2", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dataproc/v2", - sha256 = "8e90936ef4ffbb935a319d5b92fcaf9ef2d7c8fb15e53a473a6bbd24aaf04038", - strip_prefix = "cloud.google.com/go/dataproc/v2@v2.4.1", + sha256 = "a06ef35391acd2074b1454c6c90b1db967872679426d353add376a23650abff4", + strip_prefix = "cloud.google.com/go/dataproc/v2@v2.4.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataproc/v2/com_google_cloud_go_dataproc_v2-v2.4.0.zip", ], ) go_repository( name = "com_google_cloud_go_dataqna", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dataqna", - sha256 = "07e7d985f317ff2dc73960cc0160e9c08594eceb803a0d50187b2164e8ae391b", - strip_prefix = "cloud.google.com/go/dataqna@v0.8.6", + sha256 = "8dd6dfc408512a77bcd0e2a421128a0ff60563479dd149c273e129bf4a659513", + strip_prefix = "cloud.google.com/go/dataqna@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dataqna/com_google_cloud_go_dataqna-v0.8.5.zip", ], ) go_repository( @@ -8183,91 +8170,91 @@ def go_deps(): name = "com_google_cloud_go_datastream", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/datastream", - sha256 = "983f8e50bdd7b2c9a80cbb45f91c42c0c220dcfa221587f94cbdf15a8c40089b", - strip_prefix = "cloud.google.com/go/datastream@v1.10.5", + sha256 = "d4e33da4b94b839b561119fab0927ed96848a0f2ab007d28d05b492fbf5ee89b", + strip_prefix = "cloud.google.com/go/datastream@v1.10.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/datastream/com_google_cloud_go_datastream-v1.10.4.zip", ], ) go_repository( name = "com_google_cloud_go_deploy", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/deploy", - sha256 = "9c4b4c0e47524721843458e066459d843248f244b8c2154ca936ff127abd7a56", - strip_prefix = "cloud.google.com/go/deploy@v1.17.2", + sha256 = "255d773b063c5a25553fbf5b15dd80b82629a720c939e98253d3dcb47ba39bba", + strip_prefix = "cloud.google.com/go/deploy@v1.17.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/deploy/com_google_cloud_go_deploy-v1.17.1.zip", ], ) go_repository( name = "com_google_cloud_go_dialogflow", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dialogflow", - sha256 = "80c5619aca0ba08e0483b8b20222697d45040da5eea5ef6c9290e62e45d9e549", - strip_prefix = "cloud.google.com/go/dialogflow@v1.52.0", + sha256 = "0f5a512760a40552a701d6da6d4b5adf2ecdbc9e520c2943478290819ab377bd", + strip_prefix = "cloud.google.com/go/dialogflow@v1.49.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.52.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.52.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.52.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.52.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.49.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.49.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.49.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dialogflow/com_google_cloud_go_dialogflow-v1.49.0.zip", ], ) go_repository( name = "com_google_cloud_go_dlp", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/dlp", - sha256 = "d4c86ca0d382759f5c802d5987337c2631bafbafa85995f339e55dd774ae4f16", - strip_prefix = "cloud.google.com/go/dlp@v1.12.1", + sha256 = "ce1b28549395dae09e1b35dc6111e05bff9d377914d1898a2f2da29fc819f2be", + strip_prefix = "cloud.google.com/go/dlp@v1.11.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.12.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.12.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.12.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.12.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.11.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.11.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.11.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/dlp/com_google_cloud_go_dlp-v1.11.2.zip", ], ) go_repository( name = "com_google_cloud_go_documentai", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/documentai", - sha256 = "71f745236d7bd365771a44be585ed31cfa79ea2dc0b5cc89ba7cdafed43d5132", - strip_prefix = "cloud.google.com/go/documentai@v1.26.1", + sha256 = "a71869a7be5bed35de419b4648d78679c80d7b460907e87b3858490984e84f5e", + strip_prefix = "cloud.google.com/go/documentai@v1.25.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.26.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.26.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.26.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.26.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.25.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.25.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.25.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/documentai/com_google_cloud_go_documentai-v1.25.0.zip", ], ) go_repository( name = "com_google_cloud_go_domains", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/domains", - sha256 = "772c772d561a92b66fb978d71299b3ef07f5aa1ce362df12edfa45745ebaa355", - strip_prefix = "cloud.google.com/go/domains@v0.9.6", + sha256 = "52be9698870dabb6b10fd8fad795b46476de29feb174d0bfc6f10c9fc6707f13", + strip_prefix = "cloud.google.com/go/domains@v0.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/domains/com_google_cloud_go_domains-v0.9.5.zip", ], ) go_repository( name = "com_google_cloud_go_edgecontainer", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/edgecontainer", - sha256 = "6dd475039505a211b05d54e33059b9671010c0249e6a3eef68749e0dfad0f3d5", - strip_prefix = "cloud.google.com/go/edgecontainer@v1.2.0", + sha256 = "c5065dee8ac4386ae642e4f7ae7b182b4a06f99092d4d89c60487e1f947fdd03", + strip_prefix = "cloud.google.com/go/edgecontainer@v1.1.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.2.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.2.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.1.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.1.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.1.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/edgecontainer/com_google_cloud_go_edgecontainer-v1.1.5.zip", ], ) go_repository( @@ -8287,130 +8274,130 @@ def go_deps(): name = "com_google_cloud_go_essentialcontacts", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/essentialcontacts", - sha256 = "74db79783193980da3c7a7c7efb2bb0604e1d3bcb4b5f340c9b9e2c7b107ba0c", - strip_prefix = "cloud.google.com/go/essentialcontacts@v1.6.7", + sha256 = "bc60afb97314e44c3f7e65cb2e0342e5c11ef3748839105ca8a551fafb9afcfd", + strip_prefix = "cloud.google.com/go/essentialcontacts@v1.6.6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.7.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.7.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.7.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.7.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.6.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.6.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/essentialcontacts/com_google_cloud_go_essentialcontacts-v1.6.6.zip", ], ) go_repository( name = "com_google_cloud_go_eventarc", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/eventarc", - sha256 = "d7f54c4d23842104f0d75f8ca65cf7b1beabfa9e1318670ed0643fedcf3af19f", - strip_prefix = "cloud.google.com/go/eventarc@v1.13.5", + sha256 = "58cfd142c358fcef531f6290749d49bfeb90df2e0153109cc83cf95f75042272", + strip_prefix = "cloud.google.com/go/eventarc@v1.13.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/eventarc/com_google_cloud_go_eventarc-v1.13.4.zip", ], ) go_repository( name = "com_google_cloud_go_filestore", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/filestore", - sha256 = "d2e3f0e37f7ddf815e954ce77620b0e42b4f6c99c1ba58969f534e6ad0be4908", - strip_prefix = "cloud.google.com/go/filestore@v1.8.2", + sha256 = "b1a9002fa292bc485ab496718e29bcf5ecccf60ace73138b104649a13ebf1e5a", + strip_prefix = "cloud.google.com/go/filestore@v1.8.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/filestore/com_google_cloud_go_filestore-v1.8.1.zip", ], ) go_repository( name = "com_google_cloud_go_firestore", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/firestore", - sha256 = "580cc787e73def539d8b567876634758195d5d269d9fc7ec701fe488da0d7edd", - strip_prefix = "cloud.google.com/go/firestore@v1.15.0", + sha256 = "426e3589567d5b7bea9f7936863b4fe9fc7172029afc2b03cded5f69bcf3baf2", + strip_prefix = "cloud.google.com/go/firestore@v1.14.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.15.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.15.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.15.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.15.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.14.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.14.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.14.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/firestore/com_google_cloud_go_firestore-v1.14.0.zip", ], ) go_repository( name = "com_google_cloud_go_functions", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/functions", - sha256 = "fd40cb7094ad39a66fa48388476ba96cb40659437133a2d72d37ea8b9b6a61e1", - strip_prefix = "cloud.google.com/go/functions@v1.16.1", + sha256 = "6c5dd0e47056107770ea8c0a278803161fac4ac4bb4357aef5c40c6c8b5f5e44", + strip_prefix = "cloud.google.com/go/functions@v1.16.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/functions/com_google_cloud_go_functions-v1.16.0.zip", ], ) go_repository( name = "com_google_cloud_go_gkebackup", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/gkebackup", - sha256 = "45c2933f547c9a387f15a223240278e7ad34ec249a2bbaf22449ead192885dee", - strip_prefix = "cloud.google.com/go/gkebackup@v1.4.0", + sha256 = "7617734c17dd1ef31b84691b910001187d48fa88858a1d6147a7f3f192c5283c", + strip_prefix = "cloud.google.com/go/gkebackup@v1.3.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.4.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.4.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.4.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.4.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.3.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.3.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.3.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkebackup/com_google_cloud_go_gkebackup-v1.3.5.zip", ], ) go_repository( name = "com_google_cloud_go_gkeconnect", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/gkeconnect", - sha256 = "54ca0fa601687b58da2361344be341be5bc44373a3a0b21b6562d7225917fc29", - strip_prefix = "cloud.google.com/go/gkeconnect@v0.8.6", + sha256 = "e2826d1bfb49f0958d9d39117e32e18f910fe85adad4e40a35956da8a84d9e53", + strip_prefix = "cloud.google.com/go/gkeconnect@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkeconnect/com_google_cloud_go_gkeconnect-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_gkehub", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/gkehub", - sha256 = "e3f2300886ed800b820f592f63968829a0b52fe78e0cc4bcebe2b52b0aba311f", - strip_prefix = "cloud.google.com/go/gkehub@v0.14.6", + sha256 = "753d6f2b9a22a87bff6fabc8ce751b2c149368bffb430cd258d7630d67a5fc1b", + strip_prefix = "cloud.google.com/go/gkehub@v0.14.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkehub/com_google_cloud_go_gkehub-v0.14.5.zip", ], ) go_repository( name = "com_google_cloud_go_gkemulticloud", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/gkemulticloud", - sha256 = "85bbe6ca4d34043e49eec2e3527acb6d9b4d23cd38c65710d3498b3d901cdee4", - strip_prefix = "cloud.google.com/go/gkemulticloud@v1.1.2", + sha256 = "a33995596063889a8b166cad7bc6a327a12ec6cde1ba5c1b75cf4598469d7592", + strip_prefix = "cloud.google.com/go/gkemulticloud@v1.1.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gkemulticloud/com_google_cloud_go_gkemulticloud-v1.1.1.zip", ], ) go_repository( name = "com_google_cloud_go_gsuiteaddons", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/gsuiteaddons", - sha256 = "323362296193aa3881290aa04ea9325f8bd4c3e93c5c5edc7042b0e9881171fc", - strip_prefix = "cloud.google.com/go/gsuiteaddons@v1.6.6", + sha256 = "b43bd8eb7d8781aea96e06527905845fe04c1715da6b8b41342232725ef3d871", + strip_prefix = "cloud.google.com/go/gsuiteaddons@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/gsuiteaddons/com_google_cloud_go_gsuiteaddons-v1.6.5.zip", ], ) go_repository( @@ -8430,39 +8417,39 @@ def go_deps(): name = "com_google_cloud_go_iap", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/iap", - sha256 = "14f98a19725eca0f0aef1ffec6f82aa478198044b0dd1ab5ba16e754596fd2a9", - strip_prefix = "cloud.google.com/go/iap@v1.9.5", + sha256 = "923456340072c0cb9deffeb221c8bf2e67f3404cb652159238dca9b962cc7a82", + strip_prefix = "cloud.google.com/go/iap@v1.9.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/iap/com_google_cloud_go_iap-v1.9.4.zip", ], ) go_repository( name = "com_google_cloud_go_ids", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/ids", - sha256 = "0e0d05e750c8f39c50c12dee1fa233a518c0bc9688fb3369233408e51fbacde9", - strip_prefix = "cloud.google.com/go/ids@v1.4.6", + sha256 = "2ee442696e20e1fe380b48f45d458fcd38ae0a187bb66264a1b104d104024cce", + strip_prefix = "cloud.google.com/go/ids@v1.4.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/ids/com_google_cloud_go_ids-v1.4.5.zip", ], ) go_repository( name = "com_google_cloud_go_iot", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/iot", - sha256 = "c16b70bfdb510970632c37e30a71cd0da144249a4941ed5c81135c6e3aa9a75c", - strip_prefix = "cloud.google.com/go/iot@v1.7.6", + sha256 = "7727fc21d7400157c0753d1fb90d85cbd101a3db6ae665d540b52d74bc2b3a15", + strip_prefix = "cloud.google.com/go/iot@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/iot/com_google_cloud_go_iot-v1.7.5.zip", ], ) go_repository( @@ -8482,26 +8469,26 @@ def go_deps(): name = "com_google_cloud_go_language", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/language", - sha256 = "4f67344bfd44548f07bbf193cf145b4e277e3905c50259898ee37d60b855cb1a", - strip_prefix = "cloud.google.com/go/language@v1.12.4", + sha256 = "d7def4827c112b93ae2da079244155dc631871b0c460e3c309d8e2c23cea6fd5", + strip_prefix = "cloud.google.com/go/language@v1.12.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/language/com_google_cloud_go_language-v1.12.3.zip", ], ) go_repository( name = "com_google_cloud_go_lifesciences", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/lifesciences", - sha256 = "299df4b8a782aa1e4917e86c3cc9fcd8d9697ae3156e07df08d87bdc618b52da", - strip_prefix = "cloud.google.com/go/lifesciences@v0.9.6", + sha256 = "f0a13c8842d12f7766eb5ae855051db836b46fbcb7ff799b7ab4e29e1880e484", + strip_prefix = "cloud.google.com/go/lifesciences@v0.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/lifesciences/com_google_cloud_go_lifesciences-v0.9.5.zip", ], ) go_repository( @@ -8521,247 +8508,247 @@ def go_deps(): name = "com_google_cloud_go_longrunning", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/longrunning", - sha256 = "3a2d8b2bee619ed686d94fae5f99c8fca7eb69e5343892561c1b89816d6d1297", - strip_prefix = "cloud.google.com/go/longrunning@v0.5.6", + sha256 = "d7c32818f6ca09c7d5c8dfc423b2e37d8b45a0d257e5483b12eceef40f2ad29e", + strip_prefix = "cloud.google.com/go/longrunning@v0.5.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/longrunning/com_google_cloud_go_longrunning-v0.5.5.zip", ], ) go_repository( name = "com_google_cloud_go_managedidentities", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/managedidentities", - sha256 = "de12cf5b96770e3fd5af32f0500a8a6f3464bb133bdb1bfaa5583ce617631532", - strip_prefix = "cloud.google.com/go/managedidentities@v1.6.6", + sha256 = "f7c3629ff5dd4f8303e2a4e4460323025435bc1fb9cbfce5795380fcbcc71863", + strip_prefix = "cloud.google.com/go/managedidentities@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/managedidentities/com_google_cloud_go_managedidentities-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_maps", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/maps", - sha256 = "efbceffb02f1d34a36af4eeb9817045d20c85be929329ba511b0929b6abab86e", - strip_prefix = "cloud.google.com/go/maps@v1.7.1", + sha256 = "f8f673a9a144e985a661a16ab9d1000b2cac9e3f5f75b2678e012c0d599389f6", + strip_prefix = "cloud.google.com/go/maps@v1.6.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.7.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.7.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.7.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.7.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.6.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.6.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.6.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/maps/com_google_cloud_go_maps-v1.6.4.zip", ], ) go_repository( name = "com_google_cloud_go_mediatranslation", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/mediatranslation", - sha256 = "7cba5bc0d01da1525d13f5b5cf02d775ad263e0575353db7f48dcf1e2b97fb91", - strip_prefix = "cloud.google.com/go/mediatranslation@v0.8.6", + sha256 = "ad4d59c5d1fd43153f62c7955a8b079fc50395c34026df1215c04722234b2d4c", + strip_prefix = "cloud.google.com/go/mediatranslation@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/mediatranslation/com_google_cloud_go_mediatranslation-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_memcache", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/memcache", - sha256 = "e24961deeb8ca81c3647409b397fd69835565ab1323e18cc73d6f957d671697c", - strip_prefix = "cloud.google.com/go/memcache@v1.10.6", + sha256 = "3d21ca1f735630b714ede58fa46833157d5c96d0a9ab1b47572a13d1fcc62c65", + strip_prefix = "cloud.google.com/go/memcache@v1.10.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/memcache/com_google_cloud_go_memcache-v1.10.5.zip", ], ) go_repository( name = "com_google_cloud_go_metastore", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/metastore", - sha256 = "21efcf2b4458c70931f198d0b71ef215f9beca53bd50df2c7b7e7336f99ffdc8", - strip_prefix = "cloud.google.com/go/metastore@v1.13.5", + sha256 = "3be4c42d5698194020364a0d7e2c9ee4b84140d9206ffdd3c46923f1e6e8405a", + strip_prefix = "cloud.google.com/go/metastore@v1.13.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/metastore/com_google_cloud_go_metastore-v1.13.4.zip", ], ) go_repository( name = "com_google_cloud_go_monitoring", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/monitoring", - sha256 = "bba439d0b7a5b56c115aec2c8bcddda9694076e6be79503bf5ed2fd21e2daf14", - strip_prefix = "cloud.google.com/go/monitoring@v1.18.1", + sha256 = "c16947177048b8b5a0eb0736979cf067deec7aeea95405ac698ebf49da5204d6", + strip_prefix = "cloud.google.com/go/monitoring@v1.18.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/monitoring/com_google_cloud_go_monitoring-v1.18.0.zip", ], ) go_repository( name = "com_google_cloud_go_networkconnectivity", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/networkconnectivity", - sha256 = "86f8ff1a77fa0defd9b7dd9286cbfd91eef06fe5e1c1720582c0ce094150f5ad", - strip_prefix = "cloud.google.com/go/networkconnectivity@v1.14.5", + sha256 = "16094a054c49752b68585d5500370fd9d7f742470c0c26aefb8040b3d20023a1", + strip_prefix = "cloud.google.com/go/networkconnectivity@v1.14.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networkconnectivity/com_google_cloud_go_networkconnectivity-v1.14.4.zip", ], ) go_repository( name = "com_google_cloud_go_networkmanagement", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/networkmanagement", - sha256 = "c7a2ef332b414a90e364e619a8757ad013c76aea4953c4fdf6a9495f98f3c50a", - strip_prefix = "cloud.google.com/go/networkmanagement@v1.13.0", + sha256 = "08e6997d0b3ef0f6ae7f9fedf3dbf0dc4df0ca37ac48c0620def842a7b9b0ac4", + strip_prefix = "cloud.google.com/go/networkmanagement@v1.9.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.13.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.13.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.13.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.13.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.9.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.9.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.9.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networkmanagement/com_google_cloud_go_networkmanagement-v1.9.4.zip", ], ) go_repository( name = "com_google_cloud_go_networksecurity", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/networksecurity", - sha256 = "601ba38dbd21349c96d591aeae55b5eeaed23dc958f389108e3d635bd2cc9e10", - strip_prefix = "cloud.google.com/go/networksecurity@v0.9.6", + sha256 = "9fe395a99c14c2900363e97abd35140513d0501477dc8ff925d083093ee61c3c", + strip_prefix = "cloud.google.com/go/networksecurity@v0.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/networksecurity/com_google_cloud_go_networksecurity-v0.9.5.zip", ], ) go_repository( name = "com_google_cloud_go_notebooks", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/notebooks", - sha256 = "481b201b21d54273a46f3e07d6266820e245e5317320490fe56511a8a955f41d", - strip_prefix = "cloud.google.com/go/notebooks@v1.11.4", + sha256 = "eb348f5082ae07532f6340963fd526920323909948e3d2a478a1c0ed60532a05", + strip_prefix = "cloud.google.com/go/notebooks@v1.11.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/notebooks/com_google_cloud_go_notebooks-v1.11.3.zip", ], ) go_repository( name = "com_google_cloud_go_optimization", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/optimization", - sha256 = "2e5a7bf8525f76d96d49a816e7b87673d26fe3ad2fb55252a8cda9776d512a73", - strip_prefix = "cloud.google.com/go/optimization@v1.6.4", + sha256 = "23cb4effc3aa771483f2e99eee5eed014461a4f7931be408c87b1f1cfad1304c", + strip_prefix = "cloud.google.com/go/optimization@v1.6.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/optimization/com_google_cloud_go_optimization-v1.6.3.zip", ], ) go_repository( name = "com_google_cloud_go_orchestration", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/orchestration", - sha256 = "7cbcfba68f98ace3a83d27b5a21649fc5c1c6313f2ae865f69d34092701a56ab", - strip_prefix = "cloud.google.com/go/orchestration@v1.9.1", + sha256 = "3581411e89ce4af44eeb09c6c7a2fcbbeb37e8b00c3d63ecdbafcc6a1ba48557", + strip_prefix = "cloud.google.com/go/orchestration@v1.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.9.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.9.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.9.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.9.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/orchestration/com_google_cloud_go_orchestration-v1.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_orgpolicy", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/orgpolicy", - sha256 = "34185074647cfbb6753c1e557584b3e8191e3b61e0d5c45063530cd4f3894e3f", - strip_prefix = "cloud.google.com/go/orgpolicy@v1.12.2", + sha256 = "a0ea6ba027808aa1c7d90b066f47c1df38e22ac8a953ad85b8efeaa8c79a22e6", + strip_prefix = "cloud.google.com/go/orgpolicy@v1.12.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/orgpolicy/com_google_cloud_go_orgpolicy-v1.12.1.zip", ], ) go_repository( name = "com_google_cloud_go_osconfig", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/osconfig", - sha256 = "3ec14ddf4a1b4a5b9b86f4642baf38c3f9fc3c7f6e95b023a604f60947e1012e", - strip_prefix = "cloud.google.com/go/osconfig@v1.12.6", + sha256 = "02bf95f2522727ab882a9028c734ea6fd9cfe962846923b17b1579e9da7404a3", + strip_prefix = "cloud.google.com/go/osconfig@v1.12.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/osconfig/com_google_cloud_go_osconfig-v1.12.5.zip", ], ) go_repository( name = "com_google_cloud_go_oslogin", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/oslogin", - sha256 = "750257ef37ea0c52653856873b54af8510b8a9d8613ab375abed9e48395a087d", - strip_prefix = "cloud.google.com/go/oslogin@v1.13.2", + sha256 = "ca28cd9210922f2e9abd9aa283eea775060ef02f4167e3cc56bb8d92aa453c57", + strip_prefix = "cloud.google.com/go/oslogin@v1.13.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/oslogin/com_google_cloud_go_oslogin-v1.13.1.zip", ], ) go_repository( name = "com_google_cloud_go_phishingprotection", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/phishingprotection", - sha256 = "6df6a3827196206a56a7d19b82b206a17682f3cccd36b614da0a5c6c2f2e7f0b", - strip_prefix = "cloud.google.com/go/phishingprotection@v0.8.6", + sha256 = "98951639118b05caf30d9320c39c285f0cbe224c6bde63fb39acf42c4f9bbf86", + strip_prefix = "cloud.google.com/go/phishingprotection@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/phishingprotection/com_google_cloud_go_phishingprotection-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_policytroubleshooter", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/policytroubleshooter", - sha256 = "d4b3123612454faee6ac9ef88e0d781b918a95d6645fe7be3f22849676ab83a3", - strip_prefix = "cloud.google.com/go/policytroubleshooter@v1.10.4", + sha256 = "96585f3dd465551c1ba5800b2c6a1f78dbb12e47219a96ece01e0113b3e56718", + strip_prefix = "cloud.google.com/go/policytroubleshooter@v1.10.3", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.4.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.4.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.4.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.4.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.3.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.3.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.3.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/policytroubleshooter/com_google_cloud_go_policytroubleshooter-v1.10.3.zip", ], ) go_repository( name = "com_google_cloud_go_privatecatalog", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/privatecatalog", - sha256 = "e9c8e23974367cf360e0ee17bc55f3cf7e20f8fb6932cee50250755cb973df25", - strip_prefix = "cloud.google.com/go/privatecatalog@v0.9.6", + sha256 = "25b7b30d8d7be00bad226d82dc456fe19476b7323510454de227a8665fd19041", + strip_prefix = "cloud.google.com/go/privatecatalog@v0.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/privatecatalog/com_google_cloud_go_privatecatalog-v0.9.5.zip", ], ) go_repository( @@ -8794,208 +8781,208 @@ def go_deps(): name = "com_google_cloud_go_recaptchaenterprise_v2", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/recaptchaenterprise/v2", - sha256 = "924a9dfdd5a18ceff030b3fd1c7294770a754f07d6fd327a533cad13dd62cd5a", - strip_prefix = "cloud.google.com/go/recaptchaenterprise/v2@v2.12.0", + sha256 = "e83e1e652020604e58b36821cda9c9ab7fc1487c9376542a474ecbfd7f78d2db", + strip_prefix = "cloud.google.com/go/recaptchaenterprise/v2@v2.9.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.12.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.12.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.12.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.12.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.9.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.9.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.9.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recaptchaenterprise/v2/com_google_cloud_go_recaptchaenterprise_v2-v2.9.2.zip", ], ) go_repository( name = "com_google_cloud_go_recommendationengine", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/recommendationengine", - sha256 = "607da8589ff31ec8822c4ac3dcc2176d4888cb4601fcc6b2d7718234ee579118", - strip_prefix = "cloud.google.com/go/recommendationengine@v0.8.6", + sha256 = "7b3a14bf4dda969087b94a195a3341d8a340b19b4200ae69745c1b72f25208eb", + strip_prefix = "cloud.google.com/go/recommendationengine@v0.8.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recommendationengine/com_google_cloud_go_recommendationengine-v0.8.5.zip", ], ) go_repository( name = "com_google_cloud_go_recommender", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/recommender", - sha256 = "9ac2adc5ec13ef57b64e20160e8704685b660190cd46a25728f96a298503fdb0", - strip_prefix = "cloud.google.com/go/recommender@v1.12.2", + sha256 = "b8e31a6c511bd19d5cc6d07029a1d93a76199c8536539b3850048c479a4b2d59", + strip_prefix = "cloud.google.com/go/recommender@v1.12.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/recommender/com_google_cloud_go_recommender-v1.12.1.zip", ], ) go_repository( name = "com_google_cloud_go_redis", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/redis", - sha256 = "4325ff29f55cb8511756f32ef2b6f02ffeda2371dad7f7e5efe4d4fb4a5b04e9", - strip_prefix = "cloud.google.com/go/redis@v1.14.3", + sha256 = "2ad92f1fe9d4b8e3e2342e45dd868843e34c6e6020447045efa8f4cdf4b14bc9", + strip_prefix = "cloud.google.com/go/redis@v1.14.2", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.3.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.3.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.3.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.3.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.2.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.2.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/redis/com_google_cloud_go_redis-v1.14.2.zip", ], ) go_repository( name = "com_google_cloud_go_resourcemanager", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/resourcemanager", - sha256 = "65788e8dad280b1afe3d09c37cd2873be7c2d9e3b4b4e37ea04ec6dea917f6d6", - strip_prefix = "cloud.google.com/go/resourcemanager@v1.9.6", + sha256 = "8b78a11c34c7d82a72e346475e26f980f3b82419bfd74c94138b8c69ff50b325", + strip_prefix = "cloud.google.com/go/resourcemanager@v1.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/resourcemanager/com_google_cloud_go_resourcemanager-v1.9.5.zip", ], ) go_repository( name = "com_google_cloud_go_resourcesettings", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/resourcesettings", - sha256 = "56e7da8a780260123ea423b628918d0e807603072684d5785d759ec250e5fd7d", - strip_prefix = "cloud.google.com/go/resourcesettings@v1.6.6", + sha256 = "73e8418040ec80303675503371c53980c5d840dd5b77feee60f128a9070bf794", + strip_prefix = "cloud.google.com/go/resourcesettings@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/resourcesettings/com_google_cloud_go_resourcesettings-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_retail", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/retail", - sha256 = "5a3b6e9ba0c4f8221d171fd43d60a39a1876b86c238568f9f1954452e0434401", - strip_prefix = "cloud.google.com/go/retail@v1.16.1", + sha256 = "a1cc280566f55e027eb7bc746f7c5a37e7a0ec5659adbde34959275fc9a45b56", + strip_prefix = "cloud.google.com/go/retail@v1.16.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/retail/com_google_cloud_go_retail-v1.16.0.zip", ], ) go_repository( name = "com_google_cloud_go_run", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/run", - sha256 = "478517efe84510683418caabcb31475281106bfa7b047ae85ac3acd28075139c", - strip_prefix = "cloud.google.com/go/run@v1.3.6", + sha256 = "932edcab991d8ed35085a57444cc4d27585ee98ca6c927ca93a333f7f119725d", + strip_prefix = "cloud.google.com/go/run@v1.3.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/run/com_google_cloud_go_run-v1.3.4.zip", ], ) go_repository( name = "com_google_cloud_go_scheduler", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/scheduler", - sha256 = "5e44f558c0aa29efea7ecdb859474d6aad2947366b89e42f02e723513068743c", - strip_prefix = "cloud.google.com/go/scheduler@v1.10.7", + sha256 = "77ddd0298d34b30fa48df896899a1f928fe01b22220d4ca64fffd0a1d56ee50c", + strip_prefix = "cloud.google.com/go/scheduler@v1.10.6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.7.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.7.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.7.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.7.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.6.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.6.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/scheduler/com_google_cloud_go_scheduler-v1.10.6.zip", ], ) go_repository( name = "com_google_cloud_go_secretmanager", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/secretmanager", - sha256 = "7e152ee11f0dee66c40e009127ba823d4b755eefa0a0b44d7bdc80636caf5e41", - strip_prefix = "cloud.google.com/go/secretmanager@v1.12.0", + sha256 = "e3f0000863cc9944a97ebd4004b6cde6fa2484233cd12e1741506428c8265ca3", + strip_prefix = "cloud.google.com/go/secretmanager@v1.11.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.12.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.12.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.12.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.12.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.11.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.11.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.11.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/secretmanager/com_google_cloud_go_secretmanager-v1.11.5.zip", ], ) go_repository( name = "com_google_cloud_go_security", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/security", - sha256 = "30614f4b20cd36d6394f1bc60e2380b7dab3449e28320a6856fc0019d267e05d", - strip_prefix = "cloud.google.com/go/security@v1.15.6", + sha256 = "f4dd23e113cad47462715d654c95de55c1c890b37cca8c79b47bb5a7c0ec9417", + strip_prefix = "cloud.google.com/go/security@v1.15.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/security/com_google_cloud_go_security-v1.15.5.zip", ], ) go_repository( name = "com_google_cloud_go_securitycenter", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/securitycenter", - sha256 = "1c427560d47ba37c5871e5af8f84d7fd86a796c39f6f5c8ca0b888fea778bcb1", - strip_prefix = "cloud.google.com/go/securitycenter@v1.28.0", + sha256 = "2d465bd4173e7c5f7e2b395797d0053175d2501cd1c282d801f8f11cb29c03d4", + strip_prefix = "cloud.google.com/go/securitycenter@v1.24.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.28.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.28.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.28.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.28.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.24.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.24.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.24.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/securitycenter/com_google_cloud_go_securitycenter-v1.24.4.zip", ], ) go_repository( name = "com_google_cloud_go_servicedirectory", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/servicedirectory", - sha256 = "40240f7a666c469f7aea7dab67aa38e1554dc6f6ed15ec7c618918b5208e3106", - strip_prefix = "cloud.google.com/go/servicedirectory@v1.11.5", + sha256 = "ab4aeaa7d371f1458dc3b295c9ecf712a35b4d2d853b4d4fb9192454e70815fb", + strip_prefix = "cloud.google.com/go/servicedirectory@v1.11.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/servicedirectory/com_google_cloud_go_servicedirectory-v1.11.4.zip", ], ) go_repository( name = "com_google_cloud_go_shell", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/shell", - sha256 = "258bca8529cdaca4482e7fda6374f29dc2e5f60719cb85e8e7e4b66c09042ac8", - strip_prefix = "cloud.google.com/go/shell@v1.7.6", + sha256 = "28fea75e78add4a619d4ac65fdfcef1577599c20310e82ab884c686ace14021d", + strip_prefix = "cloud.google.com/go/shell@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/shell/com_google_cloud_go_shell-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_spanner", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/spanner", - sha256 = "92a6177e00c13a9abb0abc84a0908999e58870ac3ef56dfcd6aedbf488c121d8", - strip_prefix = "cloud.google.com/go/spanner@v1.60.0", + sha256 = "869ec23f371ad0565a1fe89933ed34ff76f5b673fdb7c225cfc4305e78637a90", + strip_prefix = "cloud.google.com/go/spanner@v1.57.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.60.0.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.60.0.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.60.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.60.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.57.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.57.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.57.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/spanner/com_google_cloud_go_spanner-v1.57.0.zip", ], ) go_repository( name = "com_google_cloud_go_speech", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/speech", - sha256 = "74a45f528fe5fc655f853b47bba22097ab873eb08576b2f57da44ca207e29e1d", - strip_prefix = "cloud.google.com/go/speech@v1.22.1", + sha256 = "2f1a1127cf13f85d2975f91f4296f43d59fc14273177aade2909bc94a4bbf358", + strip_prefix = "cloud.google.com/go/speech@v1.21.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.22.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.22.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.22.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.22.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.21.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.21.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.21.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/speech/com_google_cloud_go_speech-v1.21.1.zip", ], ) go_repository( @@ -9015,195 +9002,195 @@ def go_deps(): name = "com_google_cloud_go_storagetransfer", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/storagetransfer", - sha256 = "9ebee1dadb761bd2ec52915ef2cbd876b139463ef1b2e9e0da9bcfaa5a63efad", - strip_prefix = "cloud.google.com/go/storagetransfer@v1.10.5", + sha256 = "4a9f5d532a1a8c52f16428e137a4c0fca6c23f2583a8526f83f4e033a9edf9a1", + strip_prefix = "cloud.google.com/go/storagetransfer@v1.10.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/storagetransfer/com_google_cloud_go_storagetransfer-v1.10.4.zip", ], ) go_repository( name = "com_google_cloud_go_talent", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/talent", - sha256 = "5d9acef2e8037d33c8f5c7629529192e021a292a5acf673da77e4133511adc3c", - strip_prefix = "cloud.google.com/go/talent@v1.6.7", + sha256 = "e07557cef01010fff6183a646bdf3fbad238efd6e111f614302edc74f60de896", + strip_prefix = "cloud.google.com/go/talent@v1.6.6", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.7.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.7.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.7.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.7.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.6.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.6.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.6.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/talent/com_google_cloud_go_talent-v1.6.6.zip", ], ) go_repository( name = "com_google_cloud_go_texttospeech", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/texttospeech", - sha256 = "362cc2223276ae56bc1045f8e95e6ae88feac8749063a816bee899262e8562f7", - strip_prefix = "cloud.google.com/go/texttospeech@v1.7.6", + sha256 = "c136104322364aedd222839505fdca0142d3cc1d14d9a50a40ee0be2d9966fc7", + strip_prefix = "cloud.google.com/go/texttospeech@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/texttospeech/com_google_cloud_go_texttospeech-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_tpu", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/tpu", - sha256 = "7e48b694da01ab77305740c26aadf803f0062afc338e9f7e595022eb83ae4e74", - strip_prefix = "cloud.google.com/go/tpu@v1.6.6", + sha256 = "a5e0671eec0aca712a9dcc697e6b6c5bc89d4897aca092f2d5a2531152bcdf06", + strip_prefix = "cloud.google.com/go/tpu@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/tpu/com_google_cloud_go_tpu-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_trace", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/trace", - sha256 = "3f5a9f90b85b2495723861237b2eb5b68553103cd4b75c0d121182cc31c6e01d", - strip_prefix = "cloud.google.com/go/trace@v1.10.6", + sha256 = "74c62f0ced3cae41b2b0a33036d0f0dfc005e4a3c598b9f977f832095a477499", + strip_prefix = "cloud.google.com/go/trace@v1.10.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/trace/com_google_cloud_go_trace-v1.10.5.zip", ], ) go_repository( name = "com_google_cloud_go_translate", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/translate", - sha256 = "78f8a4005f3b12eef217ad516a525d5eec41654fdf82fb2f41d73004214fdb3d", - strip_prefix = "cloud.google.com/go/translate@v1.10.2", + sha256 = "400320ff3f535f32ab8a4b7f71283c3f7819eb9ac2c7917453e62554eee65a3f", + strip_prefix = "cloud.google.com/go/translate@v1.10.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/translate/com_google_cloud_go_translate-v1.10.1.zip", ], ) go_repository( name = "com_google_cloud_go_video", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/video", - sha256 = "780e7ab4bd303e5beaf6e3ab82e3c7d1bfa081bf01853c47aa7e3f940212130a", - strip_prefix = "cloud.google.com/go/video@v1.20.5", + sha256 = "8ad94a57f03f2063d8d13fdbecb7dcd5e0f477539955de36906ea0bd14f4a76f", + strip_prefix = "cloud.google.com/go/video@v1.20.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/video/com_google_cloud_go_video-v1.20.4.zip", ], ) go_repository( name = "com_google_cloud_go_videointelligence", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/videointelligence", - sha256 = "9961319a5aaeb6ffd1df671dcf81941d8ff8f5f35440e17e434148c8586e9d7b", - strip_prefix = "cloud.google.com/go/videointelligence@v1.11.6", + sha256 = "f8b6aa7f16bf09f1b581e9689b83ab3b3310397c38f48eed42c212106df5c0fd", + strip_prefix = "cloud.google.com/go/videointelligence@v1.11.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/videointelligence/com_google_cloud_go_videointelligence-v1.11.5.zip", ], ) go_repository( name = "com_google_cloud_go_vision_v2", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/vision/v2", - sha256 = "2ae5b457cfe0c3efe0e0efb88724131719bc9702120fb5bc8e15f5ece6476a15", - strip_prefix = "cloud.google.com/go/vision/v2@v2.8.1", + sha256 = "c76bd66ad2b51b7e0893605e58439003d29390398596114df6a2dab34b39ebda", + strip_prefix = "cloud.google.com/go/vision/v2@v2.8.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.1.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.1.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.0.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.0.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vision/v2/com_google_cloud_go_vision_v2-v2.8.0.zip", ], ) go_repository( name = "com_google_cloud_go_vmmigration", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/vmmigration", - sha256 = "d5705cb299663a4a0dff323d19d72061dac7569182a94f8b304cafc270440034", - strip_prefix = "cloud.google.com/go/vmmigration@v1.7.6", + sha256 = "4488c36b2324ef7a3c6aee1075bb13767a43ea4de12509d593a5ae168fa71513", + strip_prefix = "cloud.google.com/go/vmmigration@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vmmigration/com_google_cloud_go_vmmigration-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_vmwareengine", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/vmwareengine", - sha256 = "1819e47ae928ab638a0f9568e6db446a8ef5a7ff61a653b9103d8fa7471344d4", - strip_prefix = "cloud.google.com/go/vmwareengine@v1.1.2", + sha256 = "6766d871cf5cca252b3d98e138e8527374cefbca747a1530062cfebe31f3ae8e", + strip_prefix = "cloud.google.com/go/vmwareengine@v1.1.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.2.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.2.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.2.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.2.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.1.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.1.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vmwareengine/com_google_cloud_go_vmwareengine-v1.1.1.zip", ], ) go_repository( name = "com_google_cloud_go_vpcaccess", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/vpcaccess", - sha256 = "6eb319c339a46e400454a2fd2b12f3d0f06b8586bcb2e2f14476121d9f8899bc", - strip_prefix = "cloud.google.com/go/vpcaccess@v1.7.6", + sha256 = "d1aae1f25f3efe5e4f08e4f0c485d2fa839cf9f221ce87ddc815910c8e68c7db", + strip_prefix = "cloud.google.com/go/vpcaccess@v1.7.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/vpcaccess/com_google_cloud_go_vpcaccess-v1.7.5.zip", ], ) go_repository( name = "com_google_cloud_go_webrisk", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/webrisk", - sha256 = "7a860fd7409921f4d3958f1aca986876b9159243baff6d03e33478c39b1c69a0", - strip_prefix = "cloud.google.com/go/webrisk@v1.9.6", + sha256 = "1fc8a54fc71a78c9b34bca71c8e464831c63f8b745ee729305b94a69a9c94579", + strip_prefix = "cloud.google.com/go/webrisk@v1.9.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/webrisk/com_google_cloud_go_webrisk-v1.9.5.zip", ], ) go_repository( name = "com_google_cloud_go_websecurityscanner", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/websecurityscanner", - sha256 = "30b8ae174a209f185a30ad5d5c273b76610400ed0f8ff2cbf5e880281a094182", - strip_prefix = "cloud.google.com/go/websecurityscanner@v1.6.6", + sha256 = "40e8fabb14645bf3c5dd8e31791ae4afe55b5c7245d460ff7cd8d6f1d169ea2f", + strip_prefix = "cloud.google.com/go/websecurityscanner@v1.6.5", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.6.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.6.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.6.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.5.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.5.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.5.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/websecurityscanner/com_google_cloud_go_websecurityscanner-v1.6.5.zip", ], ) go_repository( name = "com_google_cloud_go_workflows", build_file_proto_mode = "disable_global", importpath = "cloud.google.com/go/workflows", - sha256 = "f0bf18ee4ac63f1ffa00729a9e231a53cb6db3d04d33efa4f1b7986ebcfdddf0", - strip_prefix = "cloud.google.com/go/workflows@v1.12.5", + sha256 = "624d1d4936eebf8b2ab6e4435002b488a96b6c1b920bfd3466b2b052ff3e4d12", + strip_prefix = "cloud.google.com/go/workflows@v1.12.4", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.5.zip", - "http://ats.apps.svc/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.5.zip", - "https://cache.hawkingrei.com/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.5.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.5.zip", + "http://bazel-cache.pingcap.net:8080/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.4.zip", + "http://ats.apps.svc/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.4.zip", + "https://cache.hawkingrei.com/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/cloud.google.com/go/workflows/com_google_cloud_go_workflows-v1.12.4.zip", ], ) go_repository( @@ -9960,19 +9947,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/codeberg.org/chavacava/garif/org_codeberg_chavacava_garif-v0.2.0.zip", ], ) - go_repository( - name = "org_gioui", - build_file_proto_mode = "disable_global", - importpath = "gioui.org", - sha256 = "fcbab2a0ea09ff775c1ff4fa99299d95b94aad496b1ac329e3c7389119168fc0", - strip_prefix = "gioui.org@v0.0.0-20210308172011-57750fc8a0a6", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "http://ats.apps.svc/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "https://cache.hawkingrei.com/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/gioui.org/org_gioui-v0.0.0-20210308172011-57750fc8a0a6.zip", - ], - ) go_repository( name = "org_go_simpler_musttag", build_file_proto_mode = "disable_global", @@ -10029,26 +10003,26 @@ def go_deps(): name = "org_golang_google_genproto", build_file_proto_mode = "disable_global", importpath = "google.golang.org/genproto", - sha256 = "9e02a5cd403a29d32b296c66a0a00bd607cd856c434a7c91f4759c5f2fd89d9c", - strip_prefix = "google.golang.org/genproto@v0.0.0-20240401170217-c3f982113cda", + sha256 = "d623f4156476f85beae7e6ed60fd7ea8a6202029074eb391a39252f3ed7d380d", + strip_prefix = "google.golang.org/genproto@v0.0.0-20240227224415-6ceb2ff114de", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240401170217-c3f982113cda.zip", - "http://ats.apps.svc/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240401170217-c3f982113cda.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240401170217-c3f982113cda.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240401170217-c3f982113cda.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240227224415-6ceb2ff114de.zip", + "http://ats.apps.svc/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240227224415-6ceb2ff114de.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240227224415-6ceb2ff114de.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/org_golang_google_genproto-v0.0.0-20240227224415-6ceb2ff114de.zip", ], ) go_repository( name = "org_golang_google_genproto_googleapis_api", build_file_proto_mode = "disable_global", importpath = "google.golang.org/genproto/googleapis/api", - sha256 = "956715d2f83c3ac6ba23c0e85494973c3d66ac375719655864cb5351746856f4", - strip_prefix = "google.golang.org/genproto/googleapis/api@v0.0.0-20240401170217-c3f982113cda", + sha256 = "7a24304baa150f3e64521242491823738fa6e9bd4bd85acf6e79c1cd6ebd847f", + strip_prefix = "google.golang.org/genproto/googleapis/api@v0.0.0-20240318140521-94a12d6c2237", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240401170217-c3f982113cda.zip", - "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240401170217-c3f982113cda.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240401170217-c3f982113cda.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240401170217-c3f982113cda.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240318140521-94a12d6c2237.zip", + "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240318140521-94a12d6c2237.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240318140521-94a12d6c2237.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/api/org_golang_google_genproto_googleapis_api-v0.0.0-20240318140521-94a12d6c2237.zip", ], ) go_repository( diff --git a/go.mod b/go.mod index 862391bb98e30..0678315b1c079 100644 --- a/go.mod +++ b/go.mod @@ -165,7 +165,7 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect diff --git a/go.sum b/go.sum index 85138e8dc83f8..b727754e8e2d0 100644 --- a/go.sum +++ b/go.sum @@ -497,14 +497,8 @@ github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYW github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= -<<<<<<< HEAD github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= -======= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= -github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= ->>>>>>> master github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -937,14 +931,8 @@ golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EH golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= -<<<<<<< HEAD -golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac h1:TSSpLIG4v+p0rPv1pNOQtl1I8knsO4S9trOxNMOLVP4= -golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= -======= golang.org/x/exp/typeparams v0.0.0-20250620022241-b7579e27df2b h1:KdrhdYPDUvJTvrDK9gdjfFd6JTk8vA1WJoldYSi0kHo= golang.org/x/exp/typeparams v0.0.0-20250620022241-b7579e27df2b/go.mod h1:LKZHyeOpPuZcMgxeHjJp4p5yvxrCX1xDvH10zYHhjjQ= -golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= ->>>>>>> master golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1275,21 +1263,12 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -<<<<<<< HEAD google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUEr4jDysRDLrm4PHePlge4v4TGAlxY= google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= -======= -google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw= -google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw= -google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda h1:b6F6WIV4xHHD0FA4oIyzU6mHWg2WI2X1RBehwa5QN38= -google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda/go.mod h1:AHcE/gZH76Bk/ROZhQphlRoWo5xKDEtz3eVEO1LfA8c= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 h1:AgADTJarZTBqgjiUzRgfaBchgYB3/WFTC80GPwsMcRI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= ->>>>>>> master google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 8e3df215aebef..2854c00ada6e9 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -33,6 +33,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/log" "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/zeropool" "go.uber.org/zap" "golang.org/x/sync/errgroup" @@ -729,7 +730,7 @@ func (pp *ParquetParser) Close() error { openedParser.Add(-1) }() - log.FromContext(context.Background()).Info("[parquet parser test] Close parquet parser") + logutil.Logger(context.Background()).Info("[parquet parser test] Close parquet parser") pp.resetReader() for _, r := range pp.readers { if err := r.Close(); err != nil { @@ -889,7 +890,7 @@ func NewParquetParser( if readerMemoryLimiter != nil { readerMemoryLimiter.Acquire(memoryUsage) } - log.FromContext(ctx).Info("Get memory usage of parquet reader", + logutil.Logger(ctx).Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), @@ -965,7 +966,7 @@ func NewParquetParser( colMetas: columnMetas, columnNames: columnNames, alloc: allocator, - logger: log.FromContext(ctx), + logger: log.Wrap(logutil.Logger(ctx)), memoryUsage: memoryUsage, memLimiter: readerMemoryLimiter, rowPool: &pool, @@ -1099,7 +1100,7 @@ func SampleStatisticsFromParquet( memoryUsageFull, err = estimateNonStreamMemory(ctx, fileMeta, store) } - log.FromContext(ctx).Info("Get memory usage of parquet reader", + logutil.Logger(ctx).Info("Get memory usage of parquet reader", zap.String("memory usage full", fmt.Sprintf("%d MB", memoryUsageFull>>20)), zap.String("memory usage stream", fmt.Sprintf("%d MB", memoryUsageStream>>20)), ) From f4fde5018ad45eca2213095fdff49c564fd35170 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 30 Jul 2025 16:27:27 +0800 Subject: [PATCH 84/93] Update go mod and remove non-streaming mode Signed-off-by: Ruihao Chen --- go.mod | 8 +- go.sum | 14 +- pkg/executor/importer/import.go | 7 +- pkg/lightning/mydump/allocator.go | 18 +- pkg/lightning/mydump/loader.go | 20 +- pkg/lightning/mydump/loader_test.go | 4 +- pkg/lightning/mydump/parquet_parser.go | 829 +++++------------- pkg/lightning/mydump/parquet_parser_test.go | 17 +- .../mydump/parquet_type_converter.go | 211 +++++ pkg/lightning/mydump/parquet_writer.go | 8 +- tools/gen-parquet/main.go | 8 +- 11 files changed, 452 insertions(+), 692 deletions(-) create mode 100644 pkg/lightning/mydump/parquet_type_converter.go diff --git a/go.mod b/go.mod index 0678315b1c079..6596d6d43b518 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/YangKeao/go-mysql-driver v0.0.0-20240627104025-dd5589458cfa github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 + github.com/apache/arrow-go/v18 v18.0.0-00010101000000-000000000000 github.com/apache/skywalking-eyes v0.4.0 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/ashanbrown/makezero v1.2.0 @@ -68,7 +69,6 @@ require ( github.com/jellydator/ttlcache/v3 v3.0.1 github.com/jfcg/sorty/v2 v2.1.0 github.com/jingyugao/rowserrcheck v1.1.1 - github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df github.com/karamaru-alpha/copyloopvar v1.2.1 @@ -155,7 +155,6 @@ require ( require ( codeberg.org/chavacava/garif v0.2.0 // indirect filippo.io/edwards25519 v1.1.0 // indirect - github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/andybalholm/brotli v1.1.1 // indirect github.com/cockroachdb/errors v1.11.3 // indirect github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect @@ -308,10 +307,10 @@ require ( golang.org/x/crypto v0.40.0 // indirect golang.org/x/exp/typeparams v0.0.0-20250620022241-b7579e27df2b // indirect golang.org/x/mod v0.26.0 // indirect - golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250425173222-7b384671a197 // indirect google.golang.org/protobuf v1.36.6 gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect @@ -326,6 +325,7 @@ require ( ) replace ( + github.com/apache/arrow-go/v18 => github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9 github.com/go-ldap/ldap/v3 => github.com/YangKeao/ldap/v3 v3.4.5-0.20230421065457-369a3bab1117 github.com/pingcap/tidb/pkg/parser => ./pkg/parser diff --git a/go.sum b/go.sum index b727754e8e2d0..f5a5f48e713cb 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,6 @@ github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= -github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= -github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= @@ -467,8 +465,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0 h1:3Ec2rNvZT3b5HUlKi1aCCDu11sn7swFiqyjdpBrSe7c= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250305032250-07d568e83cc0/go.mod h1:3HV4S6TBzH1biWLoyhiUV7DQ2XUygpKwV9bHWFyG89E= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9 h1:Y5Y0kd8/BrtU/0H3RAET3fW+/wvEot8ClSqQV875F7c= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9/go.mod h1:sET3C7K44egtWGG38eMpqWr2HsvrtxRq9iLSE3dXrYw= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= @@ -1200,8 +1198,8 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= -golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= @@ -1267,8 +1265,8 @@ google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUE google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250425173222-7b384671a197 h1:29cjnHVylHwTzH66WfFZqgSQgnxzvWE+jvBwpZCLRxY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250425173222-7b384671a197/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 3e666abe56440..9a30b22e360d2 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1293,8 +1293,8 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { - _, memoryUsageStream, memoryUsageFull, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) - memoryUsage, encodeThreadCnt, useStream := mydump.AdjustEncodeThreadCnt(memoryUsageStream, memoryUsageFull, e.Plan.ThreadCnt) + _, memoryUsage, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) + encodeThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) if err != nil { return errors.Trace(err) @@ -1302,8 +1302,7 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { for _, dataFile := range dataFiles { // To reduce the memory usage, we only use streaming mode to read file. dataFile.ParquetMeta = mydump.ParquetFileMeta{ - MemoryUsage: memoryUsage, - UseStreaming: useStream, + MemoryUsage: memoryUsage, } } diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index aae97be599bcc..4c771d32fa0a4 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -21,7 +21,7 @@ import ( "sync" "unsafe" - "github.com/joechenrh/arrow-go/v18/arrow/memory" + "github.com/apache/arrow-go/v18/arrow/memory" "github.com/pingcap/log" "github.com/pingcap/tidb/pkg/lightning/membuf" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" @@ -107,22 +107,14 @@ func ReleaseMemoryForParquet() { // AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. // It's used for IMPORT INTO. func AdjustEncodeThreadCnt( - memoryUsageStream, memoryUsageFull, threadCnt int, -) (memoryUsage, adjustCnt int, useStream bool) { + memoryUsage, threadCnt int, +) int { memTotal, err := tidbmemory.MemTotal() if err != nil { - return memoryUsage, threadCnt, true + return threadCnt } - streamThreadCnt := max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsageStream, threadCnt), 1) - fullThreadCnt := max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsageFull, threadCnt), 1) - - // TODO(joechenrh): use a more proper way to choose mode. - if streamThreadCnt == fullThreadCnt { - return memoryUsageFull, fullThreadCnt, false - } - - return memoryUsageStream, streamThreadCnt, true + return max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsage, threadCnt), 1) } func init() { diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 8a45664bb7731..ea4dda4ee8c03 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -91,9 +91,8 @@ type MDTableMeta struct { // ParquetFileMeta contains some analyzed metadata for a parquet file by MyDumper Loader. type ParquetFileMeta struct { - Rows int64 // row count - MemoryUsage int // memory usage for reader - UseStreaming bool // whether use streaming mode + Rows int64 // row count + MemoryUsage int // memory usage for reader } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. @@ -281,8 +280,6 @@ type mdLoaderSetup struct { sampledParquetRowSizes sync.Map // sampled memory usage for streaming parquet read sampledParquetMemoryUsage sync.Map - // sampled memory usage for non-streaming parquet read - sampledParquetMemoryUsageFull sync.Map } // NewLoader constructs a MyDumper loader that scanns the data source and constructs a set of metadatas. @@ -486,7 +483,6 @@ func (s *mdLoaderSetup) setup(ctx context.Context) error { v, _ = s.sampledParquetMemoryUsage.Load(tableName) info.FileMeta.ParquetMeta.MemoryUsage, _ = v.(int) - info.FileMeta.ParquetMeta.UseStreaming = true } switch info.FileMeta.Type { @@ -621,17 +617,16 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, f RawFile) (*File info.FileMeta.RealSize = EstimateRealSizeForFile(ctx, info.FileMeta, s.loader.GetStore()) case SourceTypeParquet: var ( - totalRowCount int64 - rowSize float64 - memoryUsage int - memoryUsageFull int - tableName = info.TableName.String() + totalRowCount int64 + rowSize float64 + memoryUsage int + tableName = info.TableName.String() ) // Only sample once for each table _, loaded := s.sampledParquetRowSizes.LoadOrStore(tableName, 0) if !loaded { - rowSize, memoryUsage, memoryUsageFull, err = + rowSize, memoryUsage, err = SampleStatisticsFromParquet(ctx, info.FileMeta, s.loader.GetStore()) if err != nil { logger.Error("fail to sample parquet row size", zap.String("category", "loader"), @@ -641,7 +636,6 @@ func (s *mdLoaderSetup) constructFileInfo(ctx context.Context, f RawFile) (*File } s.sampledParquetRowSizes.Store(tableName, rowSize) s.sampledParquetMemoryUsage.Store(tableName, memoryUsage) - s.sampledParquetMemoryUsageFull.Store(tableName, memoryUsageFull) } totalRowCount, err = ReadParquetFileRowCountByFile(ctx, s.loader.GetStore(), info.FileMeta) diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 8acfdb24bca08..71cdf9fce641e 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -27,8 +27,8 @@ import ( "testing" "time" - "github.com/joechenrh/arrow-go/v18/parquet" - "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/schema" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/common" diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 2854c00ada6e9..a1d078e8c8bc7 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -18,16 +18,12 @@ import ( "context" "fmt" "io" - "math/big" - "reflect" "strings" - "sync/atomic" - "time" - "github.com/joechenrh/arrow-go/v18/arrow/memory" - "github.com/joechenrh/arrow-go/v18/parquet" - "github.com/joechenrh/arrow-go/v18/parquet/file" - "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/file" + "github.com/apache/arrow-go/v18/parquet/schema" "github.com/pingcap/errors" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" @@ -40,24 +36,50 @@ import ( ) const ( - // defaultBatchSize is the number of rows fetched each time in the parquet reader - defaultBatchSize = 128 - // defaultBufSize specifies the default size of skip buffer. // Skip buffer is used when reading data from the cloud. If there is a gap between the current // read position and the last read position, these data is stored in this buffer to avoid // potentially reopening the underlying file when the gap size is less than the buffer size. defaultBufSize = 64 * 1024 - - utcTimeLayout = "2006-01-02 15:04:05.999999Z" - timeLayout = "2006-01-02 15:04:05.999999" ) -var openedParser atomic.Int32 +func estimateRowSize(row []types.Datum) int { + length := 0 + for _, v := range row { + if v.IsNull() { + continue + } + if v.Kind() == types.KindString { + length += len(v.GetBytes()) + } else { + length += 8 + } + } + return length +} -// columnDumper is a helper struct to read data from one column. -type columnDumper struct { - reader file.ColumnChunkReader +type innerReader[T parquet.ColumnTypes] interface { + Type() parquet.Type + Descriptor() *schema.Column + + ReadBatchInPage(batchSize int64, values []T, defLvls, repLvls []int16) (int64, int, error) + HasNext() bool + + Close() error +} + +type columnDumper interface { + Type() parquet.Type + SetReader(colReader file.ColumnChunkReader) + + Next(*types.Datum) bool + ReadNextBatch() int + + Close() error +} + +type generalColumnDumper[T parquet.ColumnTypes, R innerReader[T]] struct { + reader R batchSize int64 valueOffset int valuesBuffered int @@ -66,119 +88,108 @@ type columnDumper struct { levelsBuffered int64 defLevels []int16 repLevels []int16 - values []any + values []T - valueBuffer any -} + closed bool -func createDumper(tp parquet.Type) *columnDumper { - batchSize := 128 + setter setter[T] +} - var valueBuffer any - switch tp { - case parquet.Types.Boolean: - valueBuffer = make([]bool, batchSize) - case parquet.Types.Int32: - valueBuffer = make([]int32, batchSize) - case parquet.Types.Int64: - valueBuffer = make([]int64, batchSize) - case parquet.Types.Float: - valueBuffer = make([]float32, batchSize) - case parquet.Types.Double: - valueBuffer = make([]float64, batchSize) - case parquet.Types.Int96: - valueBuffer = make([]parquet.Int96, batchSize) - case parquet.Types.ByteArray: - valueBuffer = make([]parquet.ByteArray, batchSize) - case parquet.Types.FixedLenByteArray: - valueBuffer = make([]parquet.FixedLenByteArray, batchSize) +// newGeneralColumnDumper creates a new generic column dumper +func newGeneralColumnDumper[T parquet.ColumnTypes, R innerReader[T]]( + batchSize int, getter setter[T], +) *generalColumnDumper[T, R] { + return &generalColumnDumper[T, R]{ + batchSize: int64(batchSize), + defLevels: make([]int16, batchSize), + repLevels: make([]int16, batchSize), + values: make([]T, batchSize), + setter: getter, } +} - return &columnDumper{ - batchSize: int64(batchSize), - defLevels: make([]int16, batchSize), - repLevels: make([]int16, batchSize), - values: make([]any, batchSize), - valueBuffer: valueBuffer, - } +func (dump *generalColumnDumper[T, R]) SetReader(colReader file.ColumnChunkReader) { + dump.reader, _ = colReader.(R) } -// Type returns the column type of this dumper -func (dump *columnDumper) Type() parquet.Type { +func (dump *generalColumnDumper[T, R]) Type() parquet.Type { return dump.reader.Type() } -// SetReader sets the reader -func (dump *columnDumper) SetReader(colReader file.ColumnChunkReader) { - dump.reader = colReader - dump.valueOffset = 0 - dump.levelOffset = 0 - dump.levelsBuffered = 0 - dump.valuesBuffered = 0 +func (dump *generalColumnDumper[T, R]) Close() error { + if dump.closed { + return nil + } + + err := dump.reader.Close() + dump.closed = true + return err } -func (dump *columnDumper) readNextBatch() int { - switch reader := dump.reader.(type) { - case *file.BooleanColumnChunkReader: - values, _ := dump.valueBuffer.([]bool) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.Int32ColumnChunkReader: - values, _ := dump.valueBuffer.([]int32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.Int64ColumnChunkReader: - values, _ := dump.valueBuffer.([]int64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.Float32ColumnChunkReader: - values, _ := dump.valueBuffer.([]float32) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.Float64ColumnChunkReader: - values, _ := dump.valueBuffer.([]float64) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.Int96ColumnChunkReader: - values, _ := dump.valueBuffer.([]parquet.Int96) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.ByteArrayColumnChunkReader: - values, _ := dump.valueBuffer.([]parquet.ByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - case *file.FixedLenByteArrayColumnChunkReader: - values, _ := dump.valueBuffer.([]parquet.FixedLenByteArray) - dump.levelsBuffered, dump.valuesBuffered, _ = reader.ReadBatch(dump.batchSize, values, dump.defLevels, dump.repLevels) - } +func (dump *generalColumnDumper[T, R]) ReadNextBatch() int { + // ReadBatchInPage reads a batch of values from the current page. + // And the values returned may be shallow copies from the internal page buffer. + //nolint: errcheck + dump.levelsBuffered, dump.valuesBuffered, _ = dump.reader.ReadBatchInPage( + dump.batchSize, + dump.values, + dump.defLevels, + dump.repLevels, + ) dump.valueOffset = 0 dump.levelOffset = 0 return int(dump.levelsBuffered) } -func (dump *columnDumper) hasNext() bool { - return dump.levelOffset < dump.levelsBuffered || dump.reader.HasNext() -} - -// Next reads next value from the reader -func (dump *columnDumper) Next() (any, bool) { +// Next reads the next value with proper level handling +func (dump *generalColumnDumper[T, R]) Next(d *types.Datum) bool { if dump.levelOffset == dump.levelsBuffered { - if !dump.hasNext() { - return nil, false + if !dump.reader.HasNext() { + return false } - dump.readNextBatch() + dump.ReadNextBatch() if dump.levelsBuffered == 0 { - return nil, false + return false } } - defLevel := dump.defLevels[int(dump.levelOffset)] - // repLevel := dump.repLevels[int(dump.levelOffset)] + // Check definition level for NULL handling + defLevel := dump.defLevels[dump.levelOffset] dump.levelOffset++ if defLevel < dump.reader.Descriptor().MaxDefinitionLevel() { - return nil, true + d.SetNull() + return true } - vb := reflect.ValueOf(dump.valueBuffer) - v := vb.Index(dump.valueOffset).Interface() + value := dump.values[dump.valueOffset] dump.valueOffset++ + dump.setter(value, d) + return true +} - return v, true +func createColumnDumper(tp parquet.Type, converted *convertedType, batchSize int) columnDumper { + switch tp { + case parquet.Types.Boolean: + return newGeneralColumnDumper[bool, *file.BooleanColumnChunkReader](batchSize, getBoolData) + case parquet.Types.Int32: + return newGeneralColumnDumper[int32, *file.Int32ColumnChunkReader](batchSize, getInt32Getter(converted)) + case parquet.Types.Int64: + return newGeneralColumnDumper[int64, *file.Int64ColumnChunkReader](batchSize, getInt64Getter(converted)) + case parquet.Types.Float: + return newGeneralColumnDumper[float32, *file.Float32ColumnChunkReader](batchSize, getFloat32Data) + case parquet.Types.Double: + return newGeneralColumnDumper[float64, *file.Float64ColumnChunkReader](batchSize, getFloat64Data) + case parquet.Types.Int96: + return newGeneralColumnDumper[parquet.Int96, *file.Int96ColumnChunkReader](batchSize, getInt96Data) + case parquet.Types.ByteArray: + return newGeneralColumnDumper[parquet.ByteArray, *file.ByteArrayColumnChunkReader](batchSize, getByteArrayGetter(converted)) + case parquet.Types.FixedLenByteArray: + return newGeneralColumnDumper[parquet.FixedLenByteArray, *file.FixedLenByteArrayColumnChunkReader](batchSize, getFixedLenByteArrayGetter(converted)) + default: + return nil + } } // convertedType is older representation of the logical type in parquet @@ -188,58 +199,6 @@ type convertedType struct { decimalMeta schema.DecimalMetadata } -func binaryToDecimalStr(rawBytes []byte, scale int) string { - negative := rawBytes[0] > 127 - if negative { - for i := range rawBytes { - rawBytes[i] = ^rawBytes[i] - } - for i := len(rawBytes) - 1; i >= 0; i-- { - rawBytes[i]++ - if rawBytes[i] != 0 { - break - } - } - } - - intValue := big.NewInt(0) - intValue = intValue.SetBytes(rawBytes) - val := fmt.Sprintf("%0*d", scale, intValue) - dotIndex := len(val) - scale - var res strings.Builder - if negative { - res.WriteByte('-') - } - if dotIndex == 0 { - res.WriteByte('0') - } else { - res.WriteString(val[:dotIndex]) - } - if scale > 0 { - res.WriteByte('.') - res.WriteString(val[dotIndex:]) - } - return res.String() -} - -func formatTime(v int64, unit string, format, utcFormat string, utc bool) string { - var t time.Time - switch unit { - case "MICROS": - t = time.UnixMicro(v) - case "MILLIS": - t = time.UnixMilli(v) - default: - t = time.Unix(0, v) - } - - t = t.UTC() - if utc { - return t.Format(utcFormat) - } - return t.Format(format) -} - // parquetFileWrapper is a wrapper for storage.ReadSeekCloser // It implements io.ReaderAt interface to read parquet file using arrow-go. type parquetFileWrapper struct { @@ -254,10 +213,6 @@ type parquetFileWrapper struct { path string } -func (pf *parquetFileWrapper) Init(bufSize int) { - pf.skipBuf = make([]byte, bufSize) -} - func (pf *parquetFileWrapper) readNBytes(p []byte) (int, error) { n, err := io.ReadFull(pf, p) if err != nil && err != io.EOF { @@ -319,8 +274,8 @@ func (pf *parquetFileWrapper) Open(name string) (parquet.ReaderAtSeeker, error) store: pf.store, ctx: pf.ctx, path: name, + skipBuf: make([]byte, defaultBufSize), } - newPf.Init(defaultBufSize) return newPf, nil } @@ -333,27 +288,18 @@ type ParquetParser struct { alloc memory.Allocator - dumpers []*columnDumper + dumpers []columnDumper - // rows stores the actual data after parsing. - // rows will be fetched from rowPool and reclaimed after recycle. - rows [][]types.Datum rowPool *zeropool.Pool[[]types.Datum] - // curIdx and avail is the current index and total number of rows in rows buffer - curIdx int - avail int - curRowGroup int totalRowGroup int - curRowInGroup int // number of rows read in current group - totalRowsInGroup int // total rows in current group - curRows int // number of rows read in total - totalRows int // total rows in this file - totalBytesRead int // total bytes read, estimated by all the read datum. - firstAfterReset bool - parallelRead bool + curRowInGroup int // number of rows read in current group + totalRowsInGroup int // total rows in current group + totalRows int // total rows in this file + totalRowsRead int64 // total rows read + totalBytesRead int // total bytes read, estimated by all the read datum. lastRow Row logger log.Logger @@ -362,156 +308,16 @@ type ParquetParser struct { memLimiter *membuf.Limiter } -func (pp *ParquetParser) setStringData(row, col int, val any) { - vba, _ := val.(parquet.ByteArray) - pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") -} - -func (pp *ParquetParser) setInt32Data(row, col int, val any) { - v32, _ := val.(int32) - pp.rows[row][col].SetInt64(int64(v32)) -} - -func (pp *ParquetParser) setUint32Data(row, col int, val any) { - v64, _ := val.(int64) - pp.rows[row][col].SetUint64(uint64(v64)) -} - -func (pp *ParquetParser) setInt64Data(row, col int, val any) { - v64, _ := val.(int64) - pp.rows[row][col].SetInt64(v64) -} - -func (pp *ParquetParser) setUint64Data(row, col int, val any) { - v64, _ := val.(int64) - pp.rows[row][col].SetUint64(uint64(v64)) -} - -func (pp *ParquetParser) setTimeMillisData(row, col int, val any) { - v32, _ := val.(int32) - timeStr := formatTime(int64(v32), "MILLIS", "15:04:05.999999", "15:04:05.999999Z", true) - pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") -} - -func (pp *ParquetParser) setTimeMicrosData(row, col int, val any) { - v64, _ := val.(int64) - timeStr := formatTime(v64, "MICROS", "15:04:05.999999", "15:04:05.999999Z", true) - pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") -} - -func (pp *ParquetParser) setTimestampMillisData(row, col int, val any) { - v64, _ := val.(int64) - timeStr := formatTime(v64, "MILLIS", timeLayout, utcTimeLayout, true) - pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") -} - -func (pp *ParquetParser) setTimestampMicrosData(row, col int, val any) { - v64, _ := val.(int64) - timeStr := formatTime(v64, "MICROS", timeLayout, utcTimeLayout, true) - pp.rows[row][col].SetString(timeStr, "utf8mb4_bin") -} - -func (pp *ParquetParser) setDateData(row, col int, val any) { - v32, _ := val.(int32) - dateStr := time.Unix(int64(v32)*86400, 0).Format(time.DateOnly) - pp.rows[row][col].SetString(dateStr, "utf8mb4_bin") -} - -func (pp *ParquetParser) setDecimalData(row, col int, val any) { - colTp := pp.dumpers[col].Type() - decimal := pp.colMetas[col].decimalMeta - - if colTp == parquet.Types.Int64 || colTp == parquet.Types.Int32 { - var v int64 - if colTp == parquet.Types.Int32 { - v32, _ := val.(int32) - v = int64(v32) - } else { - v, _ = val.(int64) - } - if !decimal.IsSet || decimal.Scale == 0 { - pp.rows[row][col].SetInt64(v) - return - } - minLen := decimal.Scale + 1 - if v < 0 { - minLen++ - } - val := fmt.Sprintf("%0*d", minLen, v) - dotIndex := len(val) - int(decimal.Scale) - pp.rows[row][col].SetString(val[:dotIndex]+"."+val[dotIndex:], "utf8mb4_bin") - } else if colTp == parquet.Types.FixedLenByteArray { - v, _ := val.(parquet.FixedLenByteArray) - s := binaryToDecimalStr(v, int(decimal.Scale)) - pp.rows[row][col].SetString(s, "utf8mb4_bin") - } else { - v, _ := val.(parquet.ByteArray) - s := binaryToDecimalStr(v, int(decimal.Scale)) - pp.rows[row][col].SetString(s, "utf8mb4_bin") - } -} - -func (pp *ParquetParser) setBoolData(row, col int, val any) { - boolVal, _ := val.(bool) - if boolVal { - pp.rows[row][col].SetUint64(1) - return - } - pp.rows[row][col].SetUint64(0) -} - -func (pp *ParquetParser) setFloat32Data(row, col int, val any) { - vf32, _ := val.(float32) - pp.rows[row][col].SetFloat32(vf32) -} - -func (pp *ParquetParser) setFloat64Data(row, col int, val any) { - vf64, _ := val.(float64) - pp.rows[row][col].SetFloat64(vf64) -} - -func (pp *ParquetParser) setFixedByteArrayData(row, col int, val any) { - vfa, _ := val.(parquet.FixedLenByteArray) - pp.rows[row][col].SetString(string(vfa), "utf8mb4_bin") -} - -func (pp *ParquetParser) setByteArrayData(row, col int, val any) { - vba, _ := val.(parquet.ByteArray) - pp.rows[row][col].SetString(string(vba), "utf8mb4_bin") -} - -func (pp *ParquetParser) setInt96Data(row, col int, val any) { - // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 - // INT96 timestamp layout - // -------------------------- - // | 64 bit | 32 bit | - // --------------------------- - // | nano sec | julian day | - // --------------------------- - // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, - // where dt is a negative number but still legal in the context of Go. - // But it will cause errors or potential data inconsistency when importing. - v96, _ := val.(parquet.Int96) - pp.rows[row][col].SetString(v96.ToTime().Format(utcTimeLayout), "utf8mb4_bin") -} - // Init initializes the Parquet parser and allocate necessary buffers func (pp *ParquetParser) Init() error { meta := pp.readers[0].MetaData() - pp.curRowGroup, pp.totalRowGroup = -1, pp.readers[0].NumRowGroups() - - pp.totalRows = int(meta.NumRows) + pp.curRowGroup, pp.totalRowGroup, pp.totalRows = -1, pp.readers[0].NumRowGroups(), int(meta.NumRows) numCols := meta.Schema.NumColumns() - pp.rows = make([][]types.Datum, defaultBatchSize) - for i := range pp.rows { - pp.rows[i] = make([]types.Datum, numCols) - } - - pp.dumpers = make([]*columnDumper, numCols) + pp.dumpers = make([]columnDumper, numCols) for i := range numCols { - pp.dumpers[i] = createDumper(meta.Schema.Column(i).PhysicalType()) + pp.dumpers[i] = createColumnDumper(meta.Schema.Column(i).PhysicalType(), &pp.colMetas[i], 128) } return nil @@ -520,192 +326,65 @@ func (pp *ParquetParser) Init() error { // resetReader is used to reclaim the memory used by the column reader. func (pp *ParquetParser) resetReader() { for _, d := range pp.dumpers { - if d.reader != nil { - d.reader.Reset() - } + //nolint: errcheck + d.Close() } } // ReadRows read several rows internally and store them in the row buffer. -func (pp *ParquetParser) ReadRows(num int) (int, error) { - if num > defaultBatchSize { - return 0, errors.Errorf("Number of rows read larger than buffer size") - } - - readNum := min(num, pp.totalRows-pp.curRows) - if readNum == 0 { - return 0, nil - } - - for i := range readNum { - pp.rows[i] = pp.rowPool.Get() - } - - read := 0 - for read < readNum { - // Move to next row group - if pp.curRowInGroup == pp.totalRowsInGroup { - if pp.curRowGroup >= 0 { - pp.resetReader() - } - pp.curRowGroup++ - pp.firstAfterReset = true - for c := range len(pp.dumpers) { - rowGroupReader := pp.readers[c].RowGroup(pp.curRowGroup) - colReader, err := rowGroupReader.Column(c) - if err != nil { - return 0, errors.Trace(err) - } - pp.dumpers[c].SetReader(colReader) +func (pp *ParquetParser) readSingleRows(row []types.Datum) error { + // Move to next row group + if pp.curRowInGroup == pp.totalRowsInGroup { + if pp.curRowGroup >= 0 { + pp.resetReader() + } + pp.curRowGroup++ + for c := range len(pp.dumpers) { + rowGroup := pp.readers[c].RowGroup(pp.curRowGroup) + colReader, err := rowGroup.Column(c) + if err != nil { + return errors.Trace(err) } - pp.curRowInGroup, pp.totalRowsInGroup = 0, int(pp.readers[0].MetaData().RowGroups[pp.curRowGroup].NumRows) - } - - // Read in this group - curRead := min(readNum-read, pp.totalRowsInGroup-pp.curRowInGroup) - _, err := pp.readInGroup(curRead, read) - if err != nil { - return 0, errors.Trace(err) + pp.dumpers[c].SetReader(colReader) } - read += curRead - pp.curRowInGroup += curRead - } - - for i := range readNum { - pp.totalBytesRead += estimateRowSize(pp.rows[i]) + pp.curRowInGroup, pp.totalRowsInGroup = 0, int(pp.readers[0].MetaData().RowGroups[pp.curRowGroup].NumRows) } - pp.curRows += readNum - pp.curIdx, pp.avail = 0, readNum - return readNum, nil -} - -// readInGroup read severals rows in current row group. -// storeOffset represents the starting position for storing the read rows. -// It's a part of the ReadRows. -func (pp *ParquetParser) readInGroup(num, storeOffset int) (int, error) { - var ( - err error - total int - ) - - // After moving to the next row group, we need to read one dict page and - // at least one data page for each column. - // Since it's an I/O intensive operation, so we perform it in parallel. - // TODO(joechen): 4 is a experimental value and can be changed later. - if pp.firstAfterReset && pp.parallelRead { - pp.firstAfterReset = false - var eg errgroup.Group - eg.SetLimit(4) - for i := range len(pp.dumpers) { - dumper := pp.dumpers[i] - eg.Go(func() error { - dumper.readNextBatch() - return nil - }) - } - if err := eg.Wait(); err != nil { - return 0, err - } - } - - // Read data into buffers first + // Read in this group for col, dumper := range pp.dumpers { - meta := pp.colMetas[col] - physicalTp := dumper.Type() - - var setFunc func(row, col int, val any) - if physicalTp == parquet.Types.Boolean || physicalTp == parquet.Types.Int96 || meta.converted == schema.ConvertedTypes.None { - switch physicalTp { - case parquet.Types.Boolean: - setFunc = pp.setBoolData - case parquet.Types.Int32: - setFunc = pp.setInt32Data - case parquet.Types.Int64: - setFunc = pp.setInt64Data - case parquet.Types.Int96: - setFunc = pp.setInt96Data - case parquet.Types.Float: - setFunc = pp.setFloat32Data - case parquet.Types.Double: - setFunc = pp.setFloat64Data - case parquet.Types.ByteArray: - setFunc = pp.setByteArrayData - case parquet.Types.FixedLenByteArray: - setFunc = pp.setFixedByteArrayData - } - } else { - switch meta.converted { - case schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: - setFunc = pp.setStringData - case schema.ConvertedTypes.Int8, schema.ConvertedTypes.Int16, schema.ConvertedTypes.Int32: - setFunc = pp.setInt32Data - case schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Uint32: - setFunc = pp.setUint32Data - case schema.ConvertedTypes.Int64: - setFunc = pp.setInt64Data - case schema.ConvertedTypes.Uint64: - setFunc = pp.setUint64Data - case schema.ConvertedTypes.TimeMillis: - setFunc = pp.setTimeMillisData - case schema.ConvertedTypes.TimeMicros: - setFunc = pp.setTimeMicrosData - case schema.ConvertedTypes.TimestampMillis: - setFunc = pp.setTimestampMillisData - case schema.ConvertedTypes.TimestampMicros: - setFunc = pp.setTimestampMicrosData - case schema.ConvertedTypes.Date: - setFunc = pp.setDateData - case schema.ConvertedTypes.Decimal: - setFunc = pp.setDecimalData - } - } - - for i := range num { - val, ok := dumper.Next() - if !ok { - break - } - - if val == nil { - pp.rows[storeOffset+i][col].SetNull() - continue - } - setFunc(storeOffset+i, col, val) + if ok := dumper.Next(&row[col]); !ok { + return errors.New("error get data") } } - return total, err + pp.totalBytesRead += estimateRowSize(row) + pp.totalRowsRead++ + pp.curRowInGroup++ + return nil } // Pos returns the currently row number of the parquet file func (pp *ParquetParser) Pos() (pos int64, rowID int64) { - return int64(pp.curRows - pp.avail + pp.curIdx), pp.lastRow.RowID + return int64(pp.totalRowsRead), pp.lastRow.RowID } // SetPos implements the Parser interface. // For parquet file, this interface will read and discard the first `pos` rows, // and set the current row ID to `rowID` func (pp *ParquetParser) SetPos(pos int64, rowID int64) error { - curPos, _ := pp.Pos() - if pos < curPos { - return errors.Errorf("Parquet parset doesn't support seek back yet") - } + pp.lastRow.RowID = rowID - // Read and discard these rows - pos = min(pos, int64(pp.totalRows)) - for !(int(pos) >= pp.curRows-pp.avail && int(pos) < pp.curRows) { - numRead, err := pp.ReadRows(defaultBatchSize) - if err != nil { - return errors.Trace(err) - } - if numRead == 0 { - break + row := pp.rowPool.Get() + defer pp.rowPool.Put(row) + + // TODO(joechenrh): skip rows use underlying SkipRow interface + // For now it's ok, since only UTs use this interface + for range pos { + if err := pp.readSingleRows(row); err != nil { + return err } } - pp.curIdx = int(pos) - (pp.curRows - pp.avail) - pp.lastRow.RowID = rowID return nil } @@ -726,11 +405,9 @@ func (pp *ParquetParser) Close() error { if pp.memLimiter != nil { pp.memLimiter.Release(pp.memoryUsage) } - - openedParser.Add(-1) }() - logutil.Logger(context.Background()).Info("[parquet parser test] Close parquet parser") + pp.logger.Info("[parquet parser test] Close parquet parser") pp.resetReader() for _, r := range pp.readers { if err := r.Close(); err != nil { @@ -741,63 +418,27 @@ func (pp *ParquetParser) Close() error { return nil } -// GetRow get the the current row. -// Return error if we can't read next row. -// User should call ReadRow before calling this. -func (pp *ParquetParser) GetRow() ([]types.Datum, error) { - if pp.curIdx >= pp.avail { - read, err := pp.ReadRows(defaultBatchSize) - if err != nil { - return nil, errors.Trace(err) - } - if read == 0 { - return nil, nil - } - } - - row := pp.rows[pp.curIdx] - pp.curIdx++ - return row, nil -} - // ReadRow reads a row in the parquet file by the parser. // It implements the Parser interface. // Return io.EOF if reaching the end of the file. func (pp *ParquetParser) ReadRow() error { pp.lastRow.RowID++ pp.lastRow.Length = 0 - row, err := pp.GetRow() - if err != nil { - return errors.Trace(err) - } - if row == nil { - return io.EOF + + row := pp.rowPool.Get() + if err := pp.readSingleRows(row); err != nil { + pp.rowPool.Put(row) + return err } + pp.lastRow.Row = row - pp.lastRow.Length = 0 + pp.lastRow.Length = estimateRowSize(row) return nil } -func estimateRowSize(row []types.Datum) int { - length := 0 - for _, v := range row { - if v.IsNull() { - continue - } - if v.Kind() == types.KindString { - // use GetBytes to avoid memory allocation - length += len(v.GetBytes()) - } else { - length += 8 - } - } - return length -} - // LastRow gets the last row parsed by the parser. // It implements the Parser interface. func (pp *ParquetParser) LastRow() Row { - pp.lastRow.Length = estimateRowSize(pp.lastRow.Row) return pp.lastRow } @@ -845,8 +486,8 @@ func OpenParquetReader( store: store, ctx: ctx, path: path, + skipBuf: make([]byte, defaultBufSize), } - pf.Init(defaultBufSize) return pf, nil } @@ -872,8 +513,7 @@ func ReadParquetFileRowCountByFile( // GetDefaultParquetMeta return a default file meta func GetDefaultParquetMeta() ParquetFileMeta { return ParquetFileMeta{ - MemoryUsage: 0, - UseStreaming: true, + MemoryUsage: 0, } } @@ -890,14 +530,17 @@ func NewParquetParser( if readerMemoryLimiter != nil { readerMemoryLimiter.Acquire(memoryUsage) } - logutil.Logger(ctx).Info("Get memory usage of parquet reader", + + logger := log.Wrap(logutil.Logger(ctx)) + logger.Info("Get memory usage of parquet reader", zap.String("file", path), zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), - zap.Int32("opened parser", openedParser.Add(1)), - zap.Bool("streaming mode", meta.UseStreaming), ) + workerPool := &errgroup.Group{} + workerPool.SetLimit(8) + wrapper, ok := r.(*parquetFileWrapper) if !ok { wrapper = &parquetFileWrapper{ @@ -905,15 +548,15 @@ func NewParquetParser( store: store, ctx: ctx, path: path, + skipBuf: make([]byte, defaultBufSize), } - wrapper.Init(defaultBufSize) } allocator := GetAllocator() prop := parquet.NewReaderProperties(allocator) - prop.BufferedStreamEnabled = meta.UseStreaming + prop.BufferedStreamEnabled = true - reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop)) + reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop), file.WithWorkerPool(workerPool)) if err != nil { return nil, errors.Trace(err) } @@ -940,16 +583,17 @@ func NewParquetParser( subreaders = append(subreaders, reader) for i := 1; i < fileSchema.NumColumns(); i++ { var newWrapper parquet.ReaderAtSeeker - // If use streaming mode, we will open file for each column. - if meta.UseStreaming { - newWrapper, err = wrapper.Open("") - if err != nil { - return nil, errors.Trace(err) - } - } else { - newWrapper = wrapper + // Open file for each column. + newWrapper, err = wrapper.Open("") + if err != nil { + return nil, errors.Trace(err) } - reader, err := file.NewParquetReader(newWrapper, file.WithReadProps(prop), file.WithMetadata(reader.MetaData())) + + reader, err := file.NewParquetReader(newWrapper, + file.WithReadProps(prop), + file.WithMetadata(reader.MetaData()), + file.WithWorkerPool(workerPool), + ) if err != nil { return nil, errors.Trace(err) } @@ -962,15 +606,14 @@ func NewParquetParser( }) parser := &ParquetParser{ - readers: subreaders, - colMetas: columnMetas, - columnNames: columnNames, - alloc: allocator, - logger: log.Wrap(logutil.Logger(ctx)), - memoryUsage: memoryUsage, - memLimiter: readerMemoryLimiter, - rowPool: &pool, - parallelRead: !strings.HasPrefix(store.URI(), storage.LocalURIPrefix) && meta.UseStreaming, + readers: subreaders, + colMetas: columnMetas, + columnNames: columnNames, + alloc: allocator, + logger: logger, + memoryUsage: memoryUsage, + memLimiter: readerMemoryLimiter, + rowPool: &pool, } if err := parser.Init(); err != nil { return nil, errors.Trace(err) @@ -979,45 +622,6 @@ func NewParquetParser( return parser, nil } -func estimateNonStreamMemory( - ctx context.Context, - fileMeta SourceFileMeta, - store storage.ExternalStorage, -) (int, error) { - r, err := store.Open(ctx, fileMeta.Path, nil) - if err != nil { - return 0, err - } - - parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, ParquetFileMeta{ - MemoryUsage: 0, - UseStreaming: false, - }) - if err != nil { - return 0, err - } - - //nolint: errcheck - defer parser.Close() - - reader := parser.readers[0] - totalReadRows := reader.MetaData().RowGroups[0].NumRows - for range totalReadRows { - err = parser.ReadRow() - if err != nil { - if errors.Cause(err) == io.EOF { - break - } - return 0, err - } - lastRow := parser.LastRow() - parser.RecycleRow(lastRow) - } - - defaultAlloc, _ := parser.alloc.(*defaultAllocator) - return defaultAlloc.Allocated() + defaultArenaSize, nil -} - // SampleStatisticsFromParquet samples row size and memory usage of the parquet file. func SampleStatisticsFromParquet( ctx context.Context, @@ -1025,21 +629,17 @@ func SampleStatisticsFromParquet( store storage.ExternalStorage, ) ( avgRowSize float64, - memoryUsageStream int, - memoryUsageFull int, + memoryUsage int, err error, ) { r, err := store.Open(ctx, fileMeta.Path, nil) if err != nil { - return 0, 0, 0, err + return 0, 0, err } - parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, ParquetFileMeta{ - MemoryUsage: 0, - UseStreaming: true, - }) + parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, GetDefaultParquetMeta()) if err != nil { - return 0, 0, 0, err + return 0, 0, err } //nolint: errcheck @@ -1052,7 +652,7 @@ func SampleStatisticsFromParquet( reader := parser.readers[0] if reader.NumRowGroups() == 0 || reader.MetaData().RowGroups[0].NumRows == 0 { - return 0, 0, 0, nil + return 0, 0, nil } totalReadRows := reader.MetaData().RowGroups[0].NumRows @@ -1062,48 +662,23 @@ func SampleStatisticsFromParquet( if errors.Cause(err) == io.EOF { break } - return 0, 0, 0, err + return 0, 0, err } lastRow := parser.LastRow() - rowCount++ rowSize += int64(lastRow.Length) parser.RecycleRow(lastRow) + rowCount++ } avgRowSize = float64(rowSize) / float64(rowCount) alloc := parser.alloc defaultAlloc, _ := alloc.(*defaultAllocator) + memoryUsage = defaultAlloc.Allocated() + defaultArenaSize - // Here we add a defaultArenaSize to avoid differences in data between different files, as we only sample one file. - memoryUsageStream = defaultAlloc.Allocated() + defaultArenaSize - - pageBufferFull := 0 - memoryUsageFull = defaultAlloc.Allocated() - for _, rg := range parser.readers[0].MetaData().RowGroups { - totalUsage := 0 - for _, c := range rg.Columns { - bufSize := int(c.MetaData.GetTotalCompressedSize()) - // If single buffer size larger than arena size, non-stream mode will be disabled. - if bufSize > defaultArenaSize { - totalUsage = 32 << 30 - break - } - totalUsage += roundUp(bufSize, alignSize) - } - pageBufferFull = max(pageBufferFull, totalUsage) - } - - // Do some precheck, to prevent OOM during estimate memory usage due to large row group. - memoryUsageFull = roundUp(memoryUsageFull+pageBufferFull, defaultArenaSize) - if memoryUsageFull < (6 << 30) { - memoryUsageFull, err = estimateNonStreamMemory(ctx, fileMeta, store) - } - - logutil.Logger(ctx).Info("Get memory usage of parquet reader", - zap.String("memory usage full", fmt.Sprintf("%d MB", memoryUsageFull>>20)), - zap.String("memory usage stream", fmt.Sprintf("%d MB", memoryUsageStream>>20)), + parser.logger.Info("Get memory usage of parquet reader", + zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), ) - return avgRowSize, memoryUsageStream, memoryUsageFull, err + return avgRowSize, memoryUsage, err } diff --git a/pkg/lightning/mydump/parquet_parser_test.go b/pkg/lightning/mydump/parquet_parser_test.go index 823f39bc52c4e..212b54643b96a 100644 --- a/pkg/lightning/mydump/parquet_parser_test.go +++ b/pkg/lightning/mydump/parquet_parser_test.go @@ -21,8 +21,8 @@ import ( "testing" "time" - "github.com/joechenrh/arrow-go/v18/parquet" - "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/schema" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/types" "github.com/stretchr/testify/assert" @@ -215,11 +215,6 @@ func TestParquetVariousTypes(t *testing.T) { assert.Equal(t, row[i].GetString(), rowValue[i]) } - type TestDecimal struct { - Decimal1 int32 `parquet:"name=decimal1, type=INT32, convertedtype=DECIMAL, scale=3, precision=5"` - DecimalRef *int32 `parquet:"name=decimal2, type=INT32, convertedtype=DECIMAL, scale=3, precision=5"` - } - pc = []ParquetColumn{ { Name: "decimal1", @@ -281,10 +276,6 @@ func TestParquetVariousTypes(t *testing.T) { } } - type TestBool struct { - BoolVal bool `parquet:"name=bool_val, type=BOOLEAN"` - } - pc = []ParquetColumn{ { Name: "bool_val", @@ -396,8 +387,8 @@ func TestHiveParquetParser(t *testing.T) { require.NoError(t, err) lastRow := reader.LastRow() require.Equal(t, 2, len(lastRow.Row)) - require.Equal(t, types.KindString, lastRow.Row[1].Kind()) - ts, err := time.Parse(utcTimeLayout, lastRow.Row[1].GetString()) + require.Equal(t, types.KindMysqlTime, lastRow.Row[1].Kind()) + ts, err := lastRow.Row[1].GetMysqlTime().GoTime(time.UTC) require.NoError(t, err) require.Equal(t, results[i], ts) } diff --git a/pkg/lightning/mydump/parquet_type_converter.go b/pkg/lightning/mydump/parquet_type_converter.go new file mode 100644 index 0000000000000..073f86df849c1 --- /dev/null +++ b/pkg/lightning/mydump/parquet_type_converter.go @@ -0,0 +1,211 @@ +// Copyright 2025 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "fmt" + "math/big" + "strings" + "time" + + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/schema" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/types" +) + +type setter[T parquet.ColumnTypes] func(T, *types.Datum) + +func binaryToDecimalStr(rawBytes []byte, scale int) string { + negative := rawBytes[0] > 127 + if negative { + for i := range rawBytes { + rawBytes[i] = ^rawBytes[i] + } + for i := len(rawBytes) - 1; i >= 0; i-- { + rawBytes[i]++ + if rawBytes[i] != 0 { + break + } + } + } + + intValue := big.NewInt(0) + intValue = intValue.SetBytes(rawBytes) + val := fmt.Sprintf("%0*d", scale, intValue) + dotIndex := len(val) - scale + var res strings.Builder + if negative { + res.WriteByte('-') + } + if dotIndex == 0 { + res.WriteByte('0') + } else { + res.WriteString(val[:dotIndex]) + } + if scale > 0 { + res.WriteByte('.') + res.WriteString(val[dotIndex:]) + } + return res.String() +} + +func getBoolData(val bool, d *types.Datum) { + if val { + d.SetUint64(1) + } else { + d.SetUint64(0) + } +} + +func getDecimalFromIntImpl(val int64, d *types.Datum, converted *convertedType) { + decimal := converted.decimalMeta + if !decimal.IsSet || decimal.Scale == 0 { + d.SetInt64(val) + } + + minLen := decimal.Scale + 1 + if val < 0 { + minLen++ + } + v := fmt.Sprintf("%0*d", minLen, val) + dotIndex := len(v) - int(decimal.Scale) + d.SetString(v[:dotIndex]+"."+v[dotIndex:], "utf8mb4_bin") +} + +func getInt32Getter(converted *convertedType) setter[int32] { + switch converted.converted { + case schema.ConvertedTypes.Decimal: + return func(val int32, d *types.Datum) { + getDecimalFromIntImpl(int64(val), d, converted) + } + case schema.ConvertedTypes.Date: + return func(val int32, d *types.Datum) { + // Convert days since Unix epoch to time.Time + t := time.Unix(int64(val)*86400, 0).UTC() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeDate, 0) + d.SetMysqlTime(mysqlTime) + } + case schema.ConvertedTypes.TimeMillis: + return func(val int32, d *types.Datum) { + // Convert milliseconds to time.Time + t := time.UnixMilli(int64(val)).UTC() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) + d.SetMysqlTime(mysqlTime) + } + case schema.ConvertedTypes.Int32, schema.ConvertedTypes.None: + return func(val int32, d *types.Datum) { + d.SetInt64(int64(val)) + } + } + + return nil +} + +func getInt64Getter(converted *convertedType) setter[int64] { + switch converted.converted { + case schema.ConvertedTypes.Uint32, schema.ConvertedTypes.Uint64: + return func(val int64, d *types.Datum) { + d.SetUint64(uint64(val)) + } + case schema.ConvertedTypes.None, schema.ConvertedTypes.Int64: + return func(val int64, d *types.Datum) { + d.SetInt64(int64(val)) + } + case schema.ConvertedTypes.TimeMicros: + return func(val int64, d *types.Datum) { + // Convert microseconds to time.Time + t := time.UnixMicro(val).UTC() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) + d.SetMysqlTime(mysqlTime) + } + case schema.ConvertedTypes.TimestampMillis: + return func(val int64, d *types.Datum) { + // Convert milliseconds to time.Time + t := time.UnixMilli(val).UTC() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) + d.SetMysqlTime(mysqlTime) + } + case schema.ConvertedTypes.TimestampMicros: + return func(val int64, d *types.Datum) { + // Convert microseconds to time.Time + t := time.UnixMicro(val).UTC() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) + d.SetMysqlTime(mysqlTime) + } + case schema.ConvertedTypes.Decimal: + return func(val int64, d *types.Datum) { + getDecimalFromIntImpl(val, d, converted) + } + } + + return nil +} + +func getInt96Data(val parquet.Int96, d *types.Datum) { + // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 + // INT96 timestamp layout + // -------------------------- + // | 64 bit | 32 bit | + // --------------------------- + // | nano sec | julian day | + // --------------------------- + // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, + // where dt is a negative number but still legal in the context of Go. + // But it will cause errors or potential data inconsistency when importing. + t := val.ToTime() + mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) + d.SetMysqlTime(mysqlTime) +} + +func getFloat32Data(val float32, d *types.Datum) { + d.SetFloat32(val) +} + +func getFloat64Data(val float64, d *types.Datum) { + d.SetFloat64(val) +} + +func getByteArrayGetter(converted *convertedType) setter[parquet.ByteArray] { + switch converted.converted { + case schema.ConvertedTypes.None, schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: + return func(val parquet.ByteArray, d *types.Datum) { + d.SetString(string(val), "utf8mb4_bin") + } + case schema.ConvertedTypes.Decimal: + return func(val parquet.ByteArray, d *types.Datum) { + str := binaryToDecimalStr(val, int(converted.decimalMeta.Scale)) + d.SetString(str, "utf8mb4_bin") + } + } + + return nil +} + +func getFixedLenByteArrayGetter(converted *convertedType) setter[parquet.FixedLenByteArray] { + switch converted.converted { + case schema.ConvertedTypes.None, schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: + return func(val parquet.FixedLenByteArray, d *types.Datum) { + d.SetString(string(val), "utf8mb4_bin") + } + case schema.ConvertedTypes.Decimal: + return func(val parquet.FixedLenByteArray, d *types.Datum) { + str := binaryToDecimalStr(val, int(converted.decimalMeta.Scale)) + d.SetString(str, "utf8mb4_bin") + } + } + + return nil +} diff --git a/pkg/lightning/mydump/parquet_writer.go b/pkg/lightning/mydump/parquet_writer.go index 20b51bdfbabaa..b28cb71ec6a69 100644 --- a/pkg/lightning/mydump/parquet_writer.go +++ b/pkg/lightning/mydump/parquet_writer.go @@ -18,10 +18,10 @@ import ( "context" "fmt" - "github.com/joechenrh/arrow-go/v18/parquet" - "github.com/joechenrh/arrow-go/v18/parquet/compress" - "github.com/joechenrh/arrow-go/v18/parquet/file" - "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/compress" + "github.com/apache/arrow-go/v18/parquet/file" + "github.com/apache/arrow-go/v18/parquet/schema" "github.com/pingcap/tidb/br/pkg/storage" ) diff --git a/tools/gen-parquet/main.go b/tools/gen-parquet/main.go index 54bf8bdfacf7d..f7ae34aecb8d2 100644 --- a/tools/gen-parquet/main.go +++ b/tools/gen-parquet/main.go @@ -22,10 +22,10 @@ import ( "os" "strconv" - "github.com/joechenrh/arrow-go/v18/parquet" - "github.com/joechenrh/arrow-go/v18/parquet/compress" - "github.com/joechenrh/arrow-go/v18/parquet/file" - "github.com/joechenrh/arrow-go/v18/parquet/schema" + "github.com/apache/arrow-go/v18/parquet" + "github.com/apache/arrow-go/v18/parquet/compress" + "github.com/apache/arrow-go/v18/parquet/file" + "github.com/apache/arrow-go/v18/parquet/schema" ) type writeWrapper struct { From 1a22effb9f9b4d4d2a8b0b337d6ba157b05a10ac Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 1 Sep 2025 03:12:07 -0400 Subject: [PATCH 85/93] update timestamp Signed-off-by: Ruihao Chen --- pkg/executor/importer/import.go | 8 ++++++- pkg/executor/importer/table_import.go | 3 +++ pkg/lightning/mydump/loader.go | 2 ++ pkg/lightning/mydump/parquet_parser.go | 20 ++++++++++------ .../mydump/parquet_type_converter.go | 24 ++++++++++++------- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index 9a30b22e360d2..ee89f9ef30fe4 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -26,6 +26,7 @@ import ( "slices" "strings" "sync" + "time" "unicode/utf8" "github.com/pingcap/errors" @@ -244,7 +245,11 @@ type Plan struct { // ref https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-column-assignments Restrictive bool - SQLMode mysql.SQLMode + // Location is used to convert time type for parquet, as we assume that time stored + // in parquet is always adjusted to UTC, see + // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp + Location *time.Location + SQLMode mysql.SQLMode // Charset is the charset of the data file when file is CSV or TSV. // it might be nil when using LOAD DATA and no charset is specified. // for IMPORT INTO, it is always non-nil and default to be defaultCharacterSet. @@ -471,6 +476,7 @@ func NewImportPlan(ctx context.Context, userSctx sessionctx.Context, plan *plann FieldNullDef: defaultFieldNullDef, LineFieldsInfo: lineFieldsInfo, + Location: userSctx.GetSessionVars().Location(), SQLMode: userSctx.GetSessionVars().SQLMode, ImportantSysVars: getImportantSysVars(userSctx), diff --git a/pkg/executor/importer/table_import.go b/pkg/executor/importer/table_import.go index b4c949698c84a..a4e1e9197ecaf 100644 --- a/pkg/executor/importer/table_import.go +++ b/pkg/executor/importer/table_import.go @@ -303,6 +303,9 @@ func (ti *TableImporter) getParser(ctx context.Context, chunk *checkpoints.Chunk }, Remote: &chunk.FileMeta, } + + info.Remote.ParquetMeta.Loc = ti.Location + parser, err := ti.LoadDataController.GetParser(ctx, info) if err != nil { return nil, err diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index ea4dda4ee8c03..cec2863d6d919 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -21,6 +21,7 @@ import ( "sort" "strings" "sync" + "time" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -93,6 +94,7 @@ type MDTableMeta struct { type ParquetFileMeta struct { Rows int64 // row count MemoryUsage int // memory usage for reader + Loc *time.Location } // SourceFileMeta contains some analyzed metadata for a source file by MyDumper Loader. diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index a1d078e8c8bc7..8bc32ec44f8d7 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -19,6 +19,7 @@ import ( "fmt" "io" "strings" + "time" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/parquet" @@ -30,6 +31,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/logutil" + "github.com/pingcap/tidb/pkg/util/timeutil" "github.com/pingcap/tidb/pkg/util/zeropool" "go.uber.org/zap" "golang.org/x/sync/errgroup" @@ -169,20 +171,20 @@ func (dump *generalColumnDumper[T, R]) Next(d *types.Datum) bool { return true } -func createColumnDumper(tp parquet.Type, converted *convertedType, batchSize int) columnDumper { +func createColumnDumper(tp parquet.Type, converted *convertedType, loc *time.Location, batchSize int) columnDumper { switch tp { case parquet.Types.Boolean: return newGeneralColumnDumper[bool, *file.BooleanColumnChunkReader](batchSize, getBoolData) case parquet.Types.Int32: - return newGeneralColumnDumper[int32, *file.Int32ColumnChunkReader](batchSize, getInt32Getter(converted)) + return newGeneralColumnDumper[int32, *file.Int32ColumnChunkReader](batchSize, getInt32Getter(converted, loc)) case parquet.Types.Int64: - return newGeneralColumnDumper[int64, *file.Int64ColumnChunkReader](batchSize, getInt64Getter(converted)) + return newGeneralColumnDumper[int64, *file.Int64ColumnChunkReader](batchSize, getInt64Getter(converted, loc)) case parquet.Types.Float: return newGeneralColumnDumper[float32, *file.Float32ColumnChunkReader](batchSize, getFloat32Data) case parquet.Types.Double: return newGeneralColumnDumper[float64, *file.Float64ColumnChunkReader](batchSize, getFloat64Data) case parquet.Types.Int96: - return newGeneralColumnDumper[parquet.Int96, *file.Int96ColumnChunkReader](batchSize, getInt96Data) + return newGeneralColumnDumper[parquet.Int96, *file.Int96ColumnChunkReader](batchSize, getInt96Getter(converted, loc)) case parquet.Types.ByteArray: return newGeneralColumnDumper[parquet.ByteArray, *file.ByteArrayColumnChunkReader](batchSize, getByteArrayGetter(converted)) case parquet.Types.FixedLenByteArray: @@ -309,15 +311,19 @@ type ParquetParser struct { } // Init initializes the Parquet parser and allocate necessary buffers -func (pp *ParquetParser) Init() error { +func (pp *ParquetParser) Init(loc *time.Location) error { meta := pp.readers[0].MetaData() pp.curRowGroup, pp.totalRowGroup, pp.totalRows = -1, pp.readers[0].NumRowGroups(), int(meta.NumRows) numCols := meta.Schema.NumColumns() pp.dumpers = make([]columnDumper, numCols) + + if loc == nil { + loc = timeutil.SystemLocation() + } for i := range numCols { - pp.dumpers[i] = createColumnDumper(meta.Schema.Column(i).PhysicalType(), &pp.colMetas[i], 128) + pp.dumpers[i] = createColumnDumper(meta.Schema.Column(i).PhysicalType(), &pp.colMetas[i], loc, 128) } return nil @@ -615,7 +621,7 @@ func NewParquetParser( memLimiter: readerMemoryLimiter, rowPool: &pool, } - if err := parser.Init(); err != nil { + if err := parser.Init(meta.Loc); err != nil { return nil, errors.Trace(err) } diff --git a/pkg/lightning/mydump/parquet_type_converter.go b/pkg/lightning/mydump/parquet_type_converter.go index 073f86df849c1..e6fb3ef71e13a 100644 --- a/pkg/lightning/mydump/parquet_type_converter.go +++ b/pkg/lightning/mydump/parquet_type_converter.go @@ -85,7 +85,7 @@ func getDecimalFromIntImpl(val int64, d *types.Datum, converted *convertedType) d.SetString(v[:dotIndex]+"."+v[dotIndex:], "utf8mb4_bin") } -func getInt32Getter(converted *convertedType) setter[int32] { +func getInt32Getter(converted *convertedType, loc *time.Location) setter[int32] { switch converted.converted { case schema.ConvertedTypes.Decimal: return func(val int32, d *types.Datum) { @@ -94,14 +94,14 @@ func getInt32Getter(converted *convertedType) setter[int32] { case schema.ConvertedTypes.Date: return func(val int32, d *types.Datum) { // Convert days since Unix epoch to time.Time - t := time.Unix(int64(val)*86400, 0).UTC() + t := time.Unix(int64(val)*86400, 0).In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeDate, 0) d.SetMysqlTime(mysqlTime) } case schema.ConvertedTypes.TimeMillis: return func(val int32, d *types.Datum) { // Convert milliseconds to time.Time - t := time.UnixMilli(int64(val)).UTC() + t := time.UnixMilli(int64(val)).In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } @@ -114,7 +114,7 @@ func getInt32Getter(converted *convertedType) setter[int32] { return nil } -func getInt64Getter(converted *convertedType) setter[int64] { +func getInt64Getter(converted *convertedType, loc *time.Location) setter[int64] { switch converted.converted { case schema.ConvertedTypes.Uint32, schema.ConvertedTypes.Uint64: return func(val int64, d *types.Datum) { @@ -127,21 +127,21 @@ func getInt64Getter(converted *convertedType) setter[int64] { case schema.ConvertedTypes.TimeMicros: return func(val int64, d *types.Datum) { // Convert microseconds to time.Time - t := time.UnixMicro(val).UTC() + t := time.UnixMicro(val).In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } case schema.ConvertedTypes.TimestampMillis: return func(val int64, d *types.Datum) { // Convert milliseconds to time.Time - t := time.UnixMilli(val).UTC() + t := time.UnixMilli(val).In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } case schema.ConvertedTypes.TimestampMicros: return func(val int64, d *types.Datum) { // Convert microseconds to time.Time - t := time.UnixMicro(val).UTC() + t := time.UnixMicro(val).In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } @@ -154,7 +154,7 @@ func getInt64Getter(converted *convertedType) setter[int64] { return nil } -func getInt96Data(val parquet.Int96, d *types.Datum) { +func getInt96Data(val parquet.Int96, d *types.Datum, loc *time.Location) { // FYI: https://github.com/apache/spark/blob/d66a4e82eceb89a274edeb22c2fb4384bed5078b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L171-L178 // INT96 timestamp layout // -------------------------- @@ -165,11 +165,17 @@ func getInt96Data(val parquet.Int96, d *types.Datum) { // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, // where dt is a negative number but still legal in the context of Go. // But it will cause errors or potential data inconsistency when importing. - t := val.ToTime() + t := val.ToTime().In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } +func getInt96Getter(_ *convertedType, loc *time.Location) setter[parquet.Int96] { + return func(val parquet.Int96, d *types.Datum) { + getInt96Data(val, d, loc) + } +} + func getFloat32Data(val float32, d *types.Datum) { d.SetFloat32(val) } From fad328d269f50e4e43b3a7f2bb9e9900c36fbc6b Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 2 Sep 2025 04:50:03 -0400 Subject: [PATCH 86/93] minor update Signed-off-by: Ruihao Chen --- pkg/executor/importer/chunk_process.go | 54 ++++++++------ pkg/executor/importer/kv_encode.go | 71 +++++++++---------- pkg/lightning/backend/kv/base.go | 62 ++++++++++------ pkg/lightning/config/config.go | 5 -- pkg/lightning/mydump/csv_parser.go | 6 ++ pkg/lightning/mydump/parquet_parser.go | 16 ++++- .../mydump/parquet_type_converter.go | 4 +- pkg/lightning/mydump/parser.go | 12 ++++ pkg/table/column.go | 35 +++++---- 9 files changed, 163 insertions(+), 102 deletions(-) diff --git a/pkg/executor/importer/chunk_process.go b/pkg/executor/importer/chunk_process.go index e1ae52b7b6067..bdbdb163d8a2a 100644 --- a/pkg/executor/importer/chunk_process.go +++ b/pkg/executor/importer/chunk_process.go @@ -69,7 +69,7 @@ func parserEncodeReader(parser mydump.Parser, endOffset int64, filename string) return } - err = parser.ReadRow() + err = parser.ReadRowUnsafe() // todo: we can implement a ScannedPos which don't return error, will change it later. currOffset, _ := parser.ScannedPos() switch errors.Cause(err) { @@ -179,7 +179,6 @@ func (b *encodedKVGroupBatch) add(kvs *kv.Pairs) error { for _, pair := range kvs.Pairs { if tablecodec.IsRecordKey(pair.Key) { b.dataKVs = append(b.dataKVs, pair) - b.groupChecksum.UpdateOneDataKV(pair) } else { indexID, err := tablecodec.DecodeIndexID(pair.Key) if err != nil { @@ -189,7 +188,6 @@ func (b *encodedKVGroupBatch) add(kvs *kv.Pairs) error { b.indexKVs[indexID] = make([]common.KvPair, 0, cap(b.dataKVs)) } b.indexKVs[indexID] = append(b.indexKVs[indexID], pair) - b.groupChecksum.UpdateOneIndexKV(indexID, pair) } } @@ -201,6 +199,18 @@ func (b *encodedKVGroupBatch) add(kvs *kv.Pairs) error { return nil } +func (b *encodedKVGroupBatch) updateChecksum() error { + for _, pair := range b.dataKVs { + b.groupChecksum.UpdateOneDataKV(pair) + } + for indexID, kvs := range b.indexKVs { + for _, pair := range kvs { + b.groupChecksum.UpdateOneIndexKV(indexID, pair) + } + } + return nil +} + // chunkEncoder encodes data from readFn and sends encoded data to sendFn. type chunkEncoder struct { readFn encodeReaderFn @@ -216,8 +226,6 @@ type chunkEncoder struct { // total duration takes by read/encode. readTotalDur time.Duration encodeTotalDur time.Duration - - groupChecksum *verify.KVGroupChecksum } func newChunkEncoder( @@ -231,15 +239,14 @@ func newChunkEncoder( keyspace []byte, ) *chunkEncoder { return &chunkEncoder{ - chunkName: chunkName, - readFn: readFn, - offset: offset, - sendFn: sendFn, - collector: collector, - logger: logger, - encoder: encoder, - keyspace: keyspace, - groupChecksum: verify.NewKVGroupChecksumWithKeyspace(keyspace), + chunkName: chunkName, + readFn: readFn, + offset: offset, + sendFn: sendFn, + collector: collector, + logger: logger, + encoder: encoder, + keyspace: keyspace, } } @@ -291,8 +298,6 @@ func (p *chunkEncoder) encodeLoop(ctx context.Context) error { } } - p.groupChecksum.Add(kvGroupBatch.groupChecksum) - if err := p.sendFn(ctx, kvGroupBatch); err != nil { return err } @@ -301,9 +306,12 @@ func (p *chunkEncoder) encodeLoop(ctx context.Context) error { p.collector.Add(delta, int64(rowCount)) } + avgRowSize := delta / int64(rowCount) + rowBatchSize := MinDeliverBytes * 3 / 2 / uint64(avgRowSize) + // the ownership of rowBatch is transferred to the receiver of sendFn, we should // not touch it anymore. - rowBatch = make([]*kv.Pairs, 0, MinDeliverRowCnt) + rowBatch = make([]*kv.Pairs, 0, rowBatchSize) rowBatchByteSize = 0 rowCount = 0 readDur = 0 @@ -351,11 +359,9 @@ func (p *chunkEncoder) encodeLoop(ctx context.Context) error { } func (p *chunkEncoder) summaryFields() []zap.Field { - mergedChecksum := p.groupChecksum.MergedChecksum() return []zap.Field{ zap.Duration("readDur", p.readTotalDur), zap.Duration("encodeDur", p.encodeTotalDur), - zap.Object("checksum", &mergedChecksum), } } @@ -396,7 +402,7 @@ func (p *baseChunkProcessor) Process(ctx context.Context) (err error) { err2 := group.Wait() // in some unit tests it's nil if c := p.groupChecksum; c != nil { - c.Add(p.enc.groupChecksum) + c.Add(p.deliver.groupChecksum) } return err2 } @@ -422,6 +428,7 @@ func NewFileChunkProcessor( kvBatch: make(chan *encodedKVGroupBatch, maxKVQueueSize), dataWriter: dataWriter, indexWriter: indexWriter, + groupChecksum: verify.NewKVGroupChecksumWithKeyspace(keyspace), } return &baseChunkProcessor{ sourceType: DataSourceTypeFile, @@ -449,6 +456,8 @@ type dataDeliver struct { indexWriter backend.EngineWriter deliverTotalDur time.Duration + + groupChecksum *verify.KVGroupChecksum } func (p *dataDeliver) encodeDone() { @@ -495,6 +504,9 @@ func (p *dataDeliver) deliverLoop(ctx context.Context) error { return ctx.Err() } + kvBatch.updateChecksum() + p.groupChecksum.Add(kvBatch.groupChecksum) + err := func() error { p.diskQuotaLock.RLock() defer p.diskQuotaLock.RUnlock() @@ -537,7 +549,9 @@ func (p *dataDeliver) deliverLoop(ctx context.Context) error { } func (p *dataDeliver) summaryFields() []zap.Field { + mergedChecksum := p.groupChecksum.MergedChecksum() return []zap.Field{ + zap.Object("checksum", &mergedChecksum), zap.Duration("deliverDur", p.deliverTotalDur), } } diff --git a/pkg/executor/importer/kv_encode.go b/pkg/executor/importer/kv_encode.go index f7f0bc62e36a9..6b90dbf8da931 100644 --- a/pkg/executor/importer/kv_encode.go +++ b/pkg/executor/importer/kv_encode.go @@ -158,64 +158,63 @@ func (en *TableKVEncoder) parserData2TableData(parserData []types.Datum, rowID i return newRow, nil } -// getRow gets the row which from `insert into select from` or `load data`. -// The input values from these two statements are datums instead of -// expressions which are used in `insert into set x=y`. -// copied from InsertValues -func (en *TableKVEncoder) getRow(vals []types.Datum, rowID int64) ([]types.Datum, error) { +func (en *TableKVEncoder) resetRowCache() { rowLen := len(en.Columns) if cap(en.rowCache) < rowLen || cap(en.hasValueCache) < rowLen { en.rowCache = make([]types.Datum, rowLen) en.hasValueCache = make([]bool, rowLen) } else { - en.rowCache = en.rowCache[:0] - en.hasValueCache = en.hasValueCache[:0] - for range rowLen { - en.rowCache = append(en.rowCache, types.Datum{}) - en.hasValueCache = append(en.hasValueCache, false) + en.rowCache = en.rowCache[:rowLen] + en.hasValueCache = en.hasValueCache[:rowLen] + for i := range rowLen { + en.rowCache[i].SetNull() + en.hasValueCache[i] = false } } +} + +// getRow gets the row which from `insert into select from` or `load data`. +// The input values from these two statements are datums instead of +// expressions which are used in `insert into set x=y`. +// copied from InsertValues +func (en *TableKVEncoder) getRow(vals []types.Datum, rowID int64) ([]types.Datum, error) { + en.resetRowCache() row := en.rowCache hasValue := en.hasValueCache - for i := range en.insertColumns { - casted, err := table.CastColumnValue(en.SessionCtx.GetExprCtx(), vals[i], en.insertColumns[i].ToInfo(), false, false) + for i, col := range en.insertColumns { + offset := col.Offset + casted, err := table.CastColumnValue(en.SessionCtx.GetExprCtx(), vals[i], col.ToInfo(), false, false) if err != nil { - return nil, err + return nil, en.LogKVConvertFailed(row, offset, col.ToInfo(), err) } - offset := en.insertColumns[i].Offset row[offset] = casted hasValue[offset] = true } - - return en.fillRow(row, hasValue, rowID) -} - -func (en *TableKVEncoder) fillRow(row []types.Datum, hasValue []bool, rowID int64) ([]types.Datum, error) { - var value types.Datum - var err error - - record := en.GetOrCreateRecord() + // fill value for missing columns and handle bad null value for i, col := range en.Columns { - var theDatum *types.Datum - doCast := true - if hasValue[i] { - theDatum = &row[i] - doCast = false + isBadNullValue := false + if hasValue[i] && col.CheckNotNull(&row[i], 0) != nil { + isBadNullValue = true } - value, err = en.ProcessColDatum(col, rowID, theDatum, doCast) - if err != nil { - return nil, en.LogKVConvertFailed(row, i, col.ToInfo(), err) + + if !hasValue[i] || isBadNullValue { + value, err := en.HandleSpecialValue(col, rowID, isBadNullValue) + if err != nil { + return nil, en.LogKVConvertFailed(row, i, col.ToInfo(), err) + } + row[i] = value } - record = append(record, value) + if err := en.RebaseAutoID(col, rowID, &row[i]); err != nil { + return nil, en.LogKVConvertFailed(row, i, col.ToInfo(), err) + } } if common.TableHasAutoRowID(en.TableMeta()) { rowValue := rowID newRowID := en.AutoIDFn(rowID) - value = types.NewIntDatum(newRowID) - record = append(record, value) + row = append(row, types.NewIntDatum(newRowID)) alloc := en.TableAllocators().Get(autoid.RowIDAllocType) if err := alloc.Rebase(context.Background(), rowValue, false); err != nil { return nil, errors.Trace(err) @@ -223,12 +222,12 @@ func (en *TableKVEncoder) fillRow(row []types.Datum, hasValue []bool, rowID int6 } if len(en.GenCols) > 0 { - if errCol, err := en.EvalGeneratedColumns(record, en.Columns); err != nil { + if errCol, err := en.EvalGeneratedColumns(row, en.Columns); err != nil { return nil, en.LogEvalGenExprFailed(row, errCol, err) } } - return record, nil + return row, nil } // Close the TableKVEncoder. diff --git a/pkg/lightning/backend/kv/base.go b/pkg/lightning/backend/kv/base.go index 7fb3679468677..60c37af78c07e 100644 --- a/pkg/lightning/backend/kv/base.go +++ b/pkg/lightning/backend/kv/base.go @@ -185,9 +185,9 @@ func NewBaseKVEncoder(config *encode.EncodingConfig) (*BaseKVEncoder, error) { // GetOrCreateRecord returns a record slice from the cache if possible, otherwise creates a new one. func (e *BaseKVEncoder) GetOrCreateRecord() []types.Datum { if e.recordCache != nil { - return e.recordCache + e.recordCache = make([]types.Datum, 0, len(e.Columns)+1) } - return make([]types.Datum, 0, len(e.Columns)+1) + return e.recordCache[:0] } // Record2KV converts a row into a KV pair. @@ -206,7 +206,6 @@ func (e *BaseKVEncoder) Record2KV(record, originalRow []types.Datum, rowID int64 var encoded [9]byte // The max length of encoded int64 is 9. kvPairs.Pairs[i].RowID = codec.EncodeComparableVarint(encoded[:0], rowID) } - e.recordCache = record[:0] return kvPairs, nil } @@ -232,23 +231,8 @@ func (e *BaseKVEncoder) ProcessColDatum(col *table.Column, rowID int64, inputDat return value, err } - if e.IsAutoRandomCol(col.ToInfo()) { - meta := e.table.Meta() - shardFmt := autoid.NewShardIDFormat(&col.FieldType, meta.AutoRandomBits, meta.AutoRandomRangeBits) - // this allocator is the same as the allocator in table importer, i.e. PanickingAllocators. below too. - alloc := e.TableAllocators().Get(autoid.AutoRandomType) - if err := alloc.Rebase(context.Background(), value.GetInt64()&shardFmt.IncrementalMask(), false); err != nil { - return value, errors.Trace(err) - } - } - if IsAutoIncCol(col.ToInfo()) { - // same as RowIDAllocType, since SepAutoInc is always false when initializing allocators of Table. - alloc := e.TableAllocators().Get(autoid.AutoIncrementType) - if err := alloc.Rebase(context.Background(), GetAutoRecordID(value, &col.FieldType), false); err != nil { - return value, errors.Trace(err) - } - } - return value, nil + err = e.RebaseAutoID(col, rowID, &value) + return value, err } func (e *BaseKVEncoder) getActualDatum(col *table.Column, rowID int64, inputDatum *types.Datum, needCast bool) (types.Datum, error) { @@ -258,11 +242,9 @@ func (e *BaseKVEncoder) getActualDatum(col *table.Column, rowID int64, inputDatu ) isBadNullValue := false - exprCtx := e.SessionCtx.GetExprCtx() - errCtx := exprCtx.GetEvalCtx().ErrCtx() if inputDatum != nil { if needCast { - value, err = table.CastColumnValue(exprCtx, *inputDatum, col.ToInfo(), false, false) + value, err = table.CastColumnValue(e.SessionCtx.GetExprCtx(), *inputDatum, col.ToInfo(), false, false) if err != nil { return value, err } @@ -274,6 +256,19 @@ func (e *BaseKVEncoder) getActualDatum(col *table.Column, rowID int64, inputDatu } isBadNullValue = true } + + return e.HandleSpecialValue(col, rowID, isBadNullValue) +} + +func (e *BaseKVEncoder) HandleSpecialValue(col *table.Column, rowID int64, isBadNullValue bool) (types.Datum, error) { + var ( + value types.Datum + err error + ) + + exprCtx := e.SessionCtx.GetExprCtx() + errCtx := exprCtx.GetEvalCtx().ErrCtx() + // handle special values switch { case IsAutoIncCol(col.ToInfo()): @@ -301,6 +296,27 @@ func (e *BaseKVEncoder) getActualDatum(col *table.Column, rowID int64, inputDatu return value, err } +// RebaseAutoID rebase the auto id of the table +func (e *BaseKVEncoder) RebaseAutoID(col *table.Column, rowID int64, value *types.Datum) error { + if e.IsAutoRandomCol(col.ToInfo()) { + meta := e.table.Meta() + shardFmt := autoid.NewShardIDFormat(&col.FieldType, meta.AutoRandomBits, meta.AutoRandomRangeBits) + // this allocator is the same as the allocator in table importer, i.e. PanickingAllocators. below too. + alloc := e.TableAllocators().Get(autoid.AutoRandomType) + if err := alloc.Rebase(context.Background(), value.GetInt64()&shardFmt.IncrementalMask(), false); err != nil { + return errors.Trace(err) + } + } + if IsAutoIncCol(col.ToInfo()) { + // same as RowIDAllocType, since SepAutoInc is always false when initializing allocators of Table. + alloc := e.TableAllocators().Get(autoid.AutoIncrementType) + if err := alloc.Rebase(context.Background(), GetAutoRecordID(*value, &col.FieldType), false); err != nil { + return errors.Trace(err) + } + } + return nil +} + // IsAutoRandomCol checks if the column is auto random column. func (e *BaseKVEncoder) IsAutoRandomCol(col *model.ColumnInfo) bool { return e.table.Meta().ContainsAutoRandomBits() && col.ID == e.AutoRandomColID diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 689a16b4a9875..ad47a670dcd72 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -70,11 +70,6 @@ const ( KVWriteBatchSize = 16 * units.KiB DefaultRangeConcurrency = 16 - // For TiDB mode, inserting record to table may consume many memory, - // so we set a lower memory limit. - defaultMemoryUsageTiDB = 40 - defaultMemoryUsageLocal = 80 - defaultDistSQLScanConcurrency = 15 defaultBuildStatsConcurrency = 20 defaultIndexSerialScanConcurrency = 20 diff --git a/pkg/lightning/mydump/csv_parser.go b/pkg/lightning/mydump/csv_parser.go index 37106ff47f181..c2ed29ececd59 100644 --- a/pkg/lightning/mydump/csv_parser.go +++ b/pkg/lightning/mydump/csv_parser.go @@ -628,6 +628,12 @@ func (parser *CSVParser) replaceEOF(err error, replaced error) error { return replaced } +// ReadRowUnsafe implements the Parser interface. +// TODO(joechenrh): implement unsafe read for CSV parser +func (parser *CSVParser) ReadRowUnsafe() error { + return parser.ReadRow() +} + // ReadRow reads a row from the datafile. func (parser *CSVParser) ReadRow() error { row := &parser.lastRow diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 8bc32ec44f8d7..9518d52ffebad 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -424,10 +424,20 @@ func (pp *ParquetParser) Close() error { return nil } -// ReadRow reads a row in the parquet file by the parser. +func (pp *ParquetParser) ReadRow() error { + if err := pp.ReadRowUnsafe(); err != nil { + return err + } + for i, d := range pp.lastRow.Row { + pp.lastRow.Row[i] = *d.Clone() + } + return nil +} + +// ReadRowUnsafe reads a row in the parquet file by the parser. // It implements the Parser interface. // Return io.EOF if reaching the end of the file. -func (pp *ParquetParser) ReadRow() error { +func (pp *ParquetParser) ReadRowUnsafe() error { pp.lastRow.RowID++ pp.lastRow.Length = 0 @@ -663,7 +673,7 @@ func SampleStatisticsFromParquet( totalReadRows := reader.MetaData().RowGroups[0].NumRows for range totalReadRows { - err = parser.ReadRow() + err = parser.ReadRowUnsafe() if err != nil { if errors.Cause(err) == io.EOF { break diff --git a/pkg/lightning/mydump/parquet_type_converter.go b/pkg/lightning/mydump/parquet_type_converter.go index e6fb3ef71e13a..178dc3f132365 100644 --- a/pkg/lightning/mydump/parquet_type_converter.go +++ b/pkg/lightning/mydump/parquet_type_converter.go @@ -188,7 +188,7 @@ func getByteArrayGetter(converted *convertedType) setter[parquet.ByteArray] { switch converted.converted { case schema.ConvertedTypes.None, schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: return func(val parquet.ByteArray, d *types.Datum) { - d.SetString(string(val), "utf8mb4_bin") + d.SetBytesAsString(val, "utf8mb4_bin", uint32(len(val))) } case schema.ConvertedTypes.Decimal: return func(val parquet.ByteArray, d *types.Datum) { @@ -204,7 +204,7 @@ func getFixedLenByteArrayGetter(converted *convertedType) setter[parquet.FixedLe switch converted.converted { case schema.ConvertedTypes.None, schema.ConvertedTypes.BSON, schema.ConvertedTypes.JSON, schema.ConvertedTypes.UTF8, schema.ConvertedTypes.Enum: return func(val parquet.FixedLenByteArray, d *types.Datum) { - d.SetString(string(val), "utf8mb4_bin") + d.SetBytesAsString(val, "utf8mb4_bin", uint32(len(val))) } case schema.ConvertedTypes.Decimal: return func(val parquet.FixedLenByteArray, d *types.Datum) { diff --git a/pkg/lightning/mydump/parser.go b/pkg/lightning/mydump/parser.go index 2de7875e7c898..ddfac8cd6eabb 100644 --- a/pkg/lightning/mydump/parser.go +++ b/pkg/lightning/mydump/parser.go @@ -160,7 +160,15 @@ type Parser interface { // ScannedPos always returns the current file reader pointer's location ScannedPos() (int64, error) Close() error + + // ReadRow reads a row from the datafile. ReadRow() error + + // ReadRowUnsafe reads a row from the datafile, + // and the returned Row is only valid until the + // next call to ReadRow or ReadRowUnsafe. + ReadRowUnsafe() error + LastRow() Row RecycleRow(row Row) @@ -393,6 +401,10 @@ func (parser *ChunkParser) unescapeString(input string) string { return input } +func (parser *ChunkParser) ReadRowUnsafe() error { + return parser.ReadRow() +} + // ReadRow reads a row from the datafile. func (parser *ChunkParser) ReadRow() error { // This parser will recognize contents like: diff --git a/pkg/table/column.go b/pkg/table/column.go index 4b333e8d8f0b6..5d1963bec7bb3 100644 --- a/pkg/table/column.go +++ b/pkg/table/column.go @@ -226,14 +226,28 @@ func convertToIncorrectStringErr(err error, colName string) error { return ErrTruncatedWrongValueForField.FastGen("Incorrect string value '%s' for column '%s'", res.String(), colName) } -// handleZeroDatetime handles Timestamp/Datetime/Date zero date and invalid dates. -// Currently only called from CastValue. -// returns: +// handleZeroDatetime handles zero and invalid value for Timestamp/Datetime/Date types. +// Currently it's only used in CastValue. +// Returns: // // value (possibly adjusted) // boolean; true if break error/warning handling in CastValue and return what was returned from this // error -func handleZeroDatetime(ec errctx.Context, mode mysql.SQLMode, col *model.ColumnInfo, casted types.Datum, str string, tmIsInvalid bool) (types.Datum, bool, error) { +func handleZeroDatetime( + ec errctx.Context, mode mysql.SQLMode, + col *model.ColumnInfo, + casted, origin types.Datum, + tmIsInvalid bool, +) (types.Datum, bool, error) { + getStr := func() string { + str, err1 := origin.ToString() + if err1 != nil { + logutil.BgLogger().Warn("Datum ToString failed", zap.Stringer("Datum", origin), zap.Error(err1)) + str = origin.GetString() + } + return str + } + tm := casted.GetMysqlTime() var ( @@ -272,7 +286,7 @@ func handleZeroDatetime(ec errctx.Context, mode mysql.SQLMode, col *model.Column // * **ST**: STRICT_TRANS_TABLES // * **ELSE**: empty or NO_ZERO_IN_DATE_MODE if tm.IsZero() && col.GetType() == mysql.TypeTimestamp { - innerErr := types.ErrWrongValue.FastGenByArgs(zeroT, str) + innerErr := types.ErrWrongValue.FastGenByArgs(zeroT, getStr()) if mode.HasStrictMode() && !ignoreErr && (tmIsInvalid || mode.HasNoZeroDateMode()) { return types.NewDatum(zeroV), true, errors.Trace(innerErr) } @@ -283,7 +297,7 @@ func handleZeroDatetime(ec errctx.Context, mode mysql.SQLMode, col *model.Column return types.NewDatum(zeroV), true, nil } else if tmIsInvalid && col.GetType() == mysql.TypeTimestamp { // Prevent from being stored! Invalid timestamp! - warn := types.ErrWrongValue.FastGenByArgs(zeroT, str) + warn := types.ErrWrongValue.FastGenByArgs(zeroT, getStr()) if mode.HasStrictMode() { return types.NewDatum(zeroV), true, errors.Trace(warn) } @@ -302,7 +316,7 @@ func handleZeroDatetime(ec errctx.Context, mode mysql.SQLMode, col *model.Column } } - innerErr := types.ErrWrongValue.FastGenByArgs(zeroT, str) + innerErr := types.ErrWrongValue.FastGenByArgs(zeroT, getStr()) if mode.HasStrictMode() && !ignoreErr { return types.NewDatum(zeroV), true, errors.Trace(innerErr) } @@ -351,12 +365,7 @@ func castColumnValue(tc types.Context, ec errctx.Context, sqlMode mysql.SQLMode, err = types.ErrTruncatedWrongVal.GenWithStackByArgs(col.FieldType.CompactStr(), str) } else if !casted.IsNull() && (col.GetType() == mysql.TypeDate || col.GetType() == mysql.TypeDatetime || col.GetType() == mysql.TypeTimestamp) { - str, err1 := val.ToString() - if err1 != nil { - logutil.BgLogger().Warn("Datum ToString failed", zap.Stringer("Datum", val), zap.Error(err1)) - str = val.GetString() - } - if innCasted, exit, innErr := handleZeroDatetime(ec, sqlMode, col, casted, str, types.ErrWrongValue.Equal(err)); exit { + if innCasted, exit, innErr := handleZeroDatetime(ec, sqlMode, col, casted, val, types.ErrWrongValue.Equal(err)); exit { return innCasted, innErr } } else if err != nil && charset.ErrInvalidCharacterString.Equal(err) { From 892ff96273d66c608fb8b489cf2840b60bfe08d6 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Wed, 3 Sep 2025 23:19:56 -0400 Subject: [PATCH 87/93] update allocator Signed-off-by: Ruihao Chen --- DEPS.bzl | 194 ++++++++++------ lightning/pkg/importer/dup_detect.go | 1 - lightning/pkg/importer/import.go | 6 - lightning/pkg/importer/table_import.go | 1 - .../importinto/encode_and_sort_operator.go | 18 +- pkg/disttask/importinto/subtask_executor.go | 5 +- pkg/disttask/importinto/task_executor.go | 18 +- pkg/executor/importer/BUILD.bazel | 1 + pkg/executor/importer/chunk_process.go | 7 +- pkg/executor/importer/import.go | 7 +- pkg/executor/importer/table_import.go | 7 +- pkg/lightning/backend/kv/sql2kv.go | 1 - pkg/lightning/config/config.go | 10 - pkg/lightning/mydump/BUILD.bazel | 22 +- pkg/lightning/mydump/allocator.go | 213 ++++++------------ pkg/lightning/mydump/allocator_test.go | 12 +- pkg/lightning/mydump/append_only_allocator.go | 167 ++++++++++++++ ...{simple_allocator.go => list_allocator.go} | 159 +++++++++---- pkg/lightning/mydump/loader.go | 1 + pkg/lightning/mydump/loader_test.go | 2 +- pkg/lightning/mydump/parquet_parser.go | 36 ++- .../mydump/parquet_type_converter.go | 20 +- tools/gen-parquet/BUILD.bazel | 8 +- 23 files changed, 557 insertions(+), 359 deletions(-) create mode 100644 pkg/lightning/mydump/append_only_allocator.go rename pkg/lightning/mydump/{simple_allocator.go => list_allocator.go} (50%) diff --git a/DEPS.bzl b/DEPS.bzl index e15bb868ccdb0..5ae139d851c8a 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -160,19 +160,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/alecthomas/kingpin/v2/com_github_alecthomas_kingpin_v2-v2.4.0.zip", ], ) - go_repository( - name = "com_github_alecthomas_participle_v2", - build_file_proto_mode = "disable_global", - importpath = "github.com/alecthomas/participle/v2", - sha256 = "257ab6b73198005370511b9677004134374f41464eb3731298c38c1b768b1218", - strip_prefix = "github.com/alecthomas/participle/v2@v2.1.0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", - "http://ats.apps.svc/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/alecthomas/participle/v2/com_github_alecthomas_participle_v2-v2.1.0.zip", - ], - ) go_repository( name = "com_github_alecthomas_units", build_file_proto_mode = "disable_global", @@ -303,6 +290,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/antihax/optional/com_github_antihax_optional-v1.0.0.zip", ], ) + go_repository( + name = "com_github_antlr4_go_antlr_v4", + build_file_proto_mode = "disable_global", + importpath = "github.com/antlr4-go/antlr/v4", + sha256 = "2fb455ab1f46e53a3cfa678306a6e405f291b34858222db9240335c018ae9554", + strip_prefix = "github.com/antlr4-go/antlr/v4@v4.13.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/antlr4-go/antlr/v4/com_github_antlr4_go_antlr_v4-v4.13.1.zip", + "http://ats.apps.svc/gomod/github.com/antlr4-go/antlr/v4/com_github_antlr4_go_antlr_v4-v4.13.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/antlr4-go/antlr/v4/com_github_antlr4_go_antlr_v4-v4.13.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/antlr4-go/antlr/v4/com_github_antlr4_go_antlr_v4-v4.13.1.zip", + ], + ) go_repository( name = "com_github_antonboom_errname", build_file_proto_mode = "disable_global", @@ -342,6 +342,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/Antonboom/testifylint/com_github_antonboom_testifylint-v1.6.1.zip", ], ) + go_repository( + name = "com_github_apache_arrow_go_v18", + build_file_proto_mode = "disable_global", + importpath = "github.com/apache/arrow-go/v18", + sha256 = "af71f087f5777b1d33c7c365780d92ede1301925bcb31b408929ce460678ee4a", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250901051834-4df8b8d27fe9", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", + ], + ) go_repository( name = "com_github_apache_skywalking_eyes", build_file_proto_mode = "disable_global", @@ -459,6 +472,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/ashanbrown/makezero/v2/com_github_ashanbrown_makezero_v2-v2.0.1.zip", ], ) + go_repository( + name = "com_github_atomicgo_cursor", + build_file_proto_mode = "disable_global", + importpath = "github.com/atomicgo/cursor", + sha256 = "b77665289a1a6dc750cf3752537e5b5e67abdeb3e98547ce59d2eef210bf36cf", + strip_prefix = "github.com/atomicgo/cursor@v0.0.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/atomicgo/cursor/com_github_atomicgo_cursor-v0.0.1.zip", + "http://ats.apps.svc/gomod/github.com/atomicgo/cursor/com_github_atomicgo_cursor-v0.0.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/atomicgo/cursor/com_github_atomicgo_cursor-v0.0.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/atomicgo/cursor/com_github_atomicgo_cursor-v0.0.1.zip", + ], + ) go_repository( name = "com_github_aws_aws_sdk_go", build_file_proto_mode = "disable_global", @@ -1174,6 +1200,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cncf/xds/go/com_github_cncf_xds_go-v0.0.0-20231128003011-0fa0005c9caa.zip", ], ) + go_repository( + name = "com_github_cockroachdb_apd_v3", + build_file_proto_mode = "disable_global", + importpath = "github.com/cockroachdb/apd/v3", + sha256 = "6ad54bb71a36fba8ca6725a00d916e51815a4c68de54096313ca6fffda6c87c2", + strip_prefix = "github.com/cockroachdb/apd/v3@v3.2.1", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/cockroachdb/apd/v3/com_github_cockroachdb_apd_v3-v3.2.1.zip", + "http://ats.apps.svc/gomod/github.com/cockroachdb/apd/v3/com_github_cockroachdb_apd_v3-v3.2.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/cockroachdb/apd/v3/com_github_cockroachdb_apd_v3-v3.2.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/cockroachdb/apd/v3/com_github_cockroachdb_apd_v3-v3.2.1.zip", + ], + ) go_repository( name = "com_github_cockroachdb_datadriven", build_file_proto_mode = "disable_global", @@ -2738,13 +2777,13 @@ def go_deps(): name = "com_github_goccy_go_yaml", build_file_proto_mode = "disable_global", importpath = "github.com/goccy/go-yaml", - sha256 = "13a7174686c1e9a053a29c848016fb2ed7a39b6befea6db085e8b5d51990d0ee", - strip_prefix = "github.com/goccy/go-yaml@v1.11.0", + sha256 = "d696eddbab891896b3a70334d04cfb0208cdc0e18544bebdf95926e1f64df310", + strip_prefix = "github.com/goccy/go-yaml@v1.17.1", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", - "http://ats.apps.svc/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.11.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.17.1.zip", + "http://ats.apps.svc/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.17.1.zip", + "https://cache.hawkingrei.com/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.17.1.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/goccy/go-yaml/com_github_goccy_go_yaml-v1.17.1.zip", ], ) go_repository( @@ -3345,6 +3384,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/googleapis/gax-go/v2/com_github_googleapis_gax_go_v2-v2.12.3.zip", ], ) + go_repository( + name = "com_github_gookit_color", + build_file_proto_mode = "disable_global", + importpath = "github.com/gookit/color", + sha256 = "c5295c810538f77bebb2cf9e34cdfa92adfdec75486234358956ebe877e685f9", + strip_prefix = "github.com/gookit/color@v1.5.4", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/gookit/color/com_github_gookit_color-v1.5.4.zip", + "http://ats.apps.svc/gomod/github.com/gookit/color/com_github_gookit_color-v1.5.4.zip", + "https://cache.hawkingrei.com/gomod/github.com/gookit/color/com_github_gookit_color-v1.5.4.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/gookit/color/com_github_gookit_color-v1.5.4.zip", + ], + ) go_repository( name = "com_github_gophercloud_gophercloud", build_file_proto_mode = "disable_global", @@ -4130,19 +4182,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/jmespath/go-jmespath/internal/testify/com_github_jmespath_go_jmespath_internal_testify-v1.5.1.zip", ], ) - go_repository( - name = "com_github_joechenrh_arrow_go_v18", - build_file_proto_mode = "disable_global", - importpath = "github.com/joechenrh/arrow-go/v18", - sha256 = "801a70a732e926caee0cf27b99c95267e6fa7d99deec1e64210d014bd58ab0ae", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250305032250-07d568e83cc0", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250305032250-07d568e83cc0.zip", - ], - ) go_repository( name = "com_github_johannesboyne_gofakes3", build_file_proto_mode = "disable_global", @@ -4156,19 +4195,6 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/johannesboyne/gofakes3/com_github_johannesboyne_gofakes3-v0.0.0-20230506070712-04da935ef877.zip", ], ) - go_repository( - name = "com_github_johncgriffin_overflow", - build_file_proto_mode = "disable_global", - importpath = "github.com/JohnCGriffin/overflow", - sha256 = "8ad4da840214861386d243127290666cc54eb914d1f4a8856523481876af2a09", - strip_prefix = "github.com/JohnCGriffin/overflow@v0.0.0-20211019200055-46fa312c352c", - urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/JohnCGriffin/overflow/com_github_johncgriffin_overflow-v0.0.0-20211019200055-46fa312c352c.zip", - "http://ats.apps.svc/gomod/github.com/JohnCGriffin/overflow/com_github_johncgriffin_overflow-v0.0.0-20211019200055-46fa312c352c.zip", - "https://cache.hawkingrei.com/gomod/github.com/JohnCGriffin/overflow/com_github_johncgriffin_overflow-v0.0.0-20211019200055-46fa312c352c.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/JohnCGriffin/overflow/com_github_johncgriffin_overflow-v0.0.0-20211019200055-46fa312c352c.zip", - ], - ) go_repository( name = "com_github_joho_sqltocsv", build_file_proto_mode = "disable_global", @@ -4953,6 +4979,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/maratori/testpackage/com_github_maratori_testpackage-v1.1.1.zip", ], ) + go_repository( + name = "com_github_marvinjwendt_testza", + build_file_proto_mode = "disable_global", + importpath = "github.com/MarvinJWendt/testza", + sha256 = "fd9b3bca02ff1677bf933a2dafa50b4a9eb5118fe793de704af6e1b4b57162e1", + strip_prefix = "github.com/MarvinJWendt/testza@v0.4.2", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/MarvinJWendt/testza/com_github_marvinjwendt_testza-v0.4.2.zip", + "http://ats.apps.svc/gomod/github.com/MarvinJWendt/testza/com_github_marvinjwendt_testza-v0.4.2.zip", + "https://cache.hawkingrei.com/gomod/github.com/MarvinJWendt/testza/com_github_marvinjwendt_testza-v0.4.2.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/MarvinJWendt/testza/com_github_marvinjwendt_testza-v0.4.2.zip", + ], + ) go_repository( name = "com_github_masterminds_goutils", build_file_proto_mode = "disable_global", @@ -6136,6 +6175,19 @@ def go_deps(): "https://storage.googleapis.com/pingcapmirror/gomod/github.com/prometheus/prometheus/com_github_prometheus_prometheus-v0.50.1.zip", ], ) + go_repository( + name = "com_github_pterm_pterm", + build_file_proto_mode = "disable_global", + importpath = "github.com/pterm/pterm", + sha256 = "846d01170c0383aceaf576d9ed864b6c27998a0e2ba5e09d046e56b921bb735d", + strip_prefix = "github.com/pterm/pterm@v0.12.40", + urls = [ + "http://bazel-cache.pingcap.net:8080/gomod/github.com/pterm/pterm/com_github_pterm_pterm-v0.12.40.zip", + "http://ats.apps.svc/gomod/github.com/pterm/pterm/com_github_pterm_pterm-v0.12.40.zip", + "https://cache.hawkingrei.com/gomod/github.com/pterm/pterm/com_github_pterm_pterm-v0.12.40.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/pterm/pterm/com_github_pterm_pterm-v0.12.40.zip", + ], + ) go_repository( name = "com_github_qri_io_jsonpointer", build_file_proto_mode = "disable_global", @@ -6894,26 +6946,26 @@ def go_deps(): name = "com_github_substrait_io_substrait", build_file_proto_mode = "disable_global", importpath = "github.com/substrait-io/substrait", - sha256 = "c0f97dde3d195992c937764f322406500d12b7e197ca3eae581b4f9369ca22f5", - strip_prefix = "github.com/substrait-io/substrait@v0.57.1", + sha256 = "fbc11abf7516a4650fb3fdc4f1fd2314dfa92260885022f96d7a3f69cf708b13", + strip_prefix = "github.com/substrait-io/substrait@v0.69.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", - "http://ats.apps.svc/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", - "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.57.1.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.69.0.zip", + "http://ats.apps.svc/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.69.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.69.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait/com_github_substrait_io_substrait-v0.69.0.zip", ], ) go_repository( - name = "com_github_substrait_io_substrait_go", + name = "com_github_substrait_io_substrait_go_v3", build_file_proto_mode = "disable_global", - importpath = "github.com/substrait-io/substrait-go", - sha256 = "fd4e19b47316b161bca0f9c10da86a16db760954b40c822bdf601b6a33d8a2e0", - strip_prefix = "github.com/substrait-io/substrait-go@v1.2.0", + importpath = "github.com/substrait-io/substrait-go/v3", + sha256 = "e24bdbfea7ee3147f7f21d9cca6ce0fca0faeebec320e3c1d58055a58a953b01", + strip_prefix = "github.com/substrait-io/substrait-go/v3@v3.3.0", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", - "http://ats.apps.svc/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", - "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait-go/com_github_substrait_io_substrait_go-v1.2.0.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/substrait-io/substrait-go/v3/com_github_substrait_io_substrait_go_v3-v3.3.0.zip", + "http://ats.apps.svc/gomod/github.com/substrait-io/substrait-go/v3/com_github_substrait_io_substrait_go_v3-v3.3.0.zip", + "https://cache.hawkingrei.com/gomod/github.com/substrait-io/substrait-go/v3/com_github_substrait_io_substrait_go_v3-v3.3.0.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/substrait-io/substrait-go/v3/com_github_substrait_io_substrait_go_v3-v3.3.0.zip", ], ) go_repository( @@ -10042,13 +10094,13 @@ def go_deps(): name = "org_golang_google_genproto_googleapis_rpc", build_file_proto_mode = "disable_global", importpath = "google.golang.org/genproto/googleapis/rpc", - sha256 = "798f4e9522193634403993f16fa73824d58282efbda9f48805071dab5008154f", - strip_prefix = "google.golang.org/genproto/googleapis/rpc@v0.0.0-20241104194629-dd2ea8efbc28", + sha256 = "038240b4d232cd89297c81c05553912a5205d034cd2d807d7448b9482c6e7843", + strip_prefix = "google.golang.org/genproto/googleapis/rpc@v0.0.0-20250425173222-7b384671a197", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", - "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", - "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20241104194629-dd2ea8efbc28.zip", + "http://bazel-cache.pingcap.net:8080/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20250425173222-7b384671a197.zip", + "http://ats.apps.svc/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20250425173222-7b384671a197.zip", + "https://cache.hawkingrei.com/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20250425173222-7b384671a197.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/google.golang.org/genproto/googleapis/rpc/org_golang_google_genproto_googleapis_rpc-v0.0.0-20250425173222-7b384671a197.zip", ], ) go_repository( @@ -10341,13 +10393,13 @@ def go_deps(): name = "org_golang_x_xerrors", build_file_proto_mode = "disable_global", importpath = "golang.org/x/xerrors", - sha256 = "df5dd109153c94d2f5c9601d28f558871094e37c42f8e3875f36db858d8be9f9", - strip_prefix = "golang.org/x/xerrors@v0.0.0-20231012003039-104605ab7028", + sha256 = "07ee9f680118861ee732ce0df4553b834383b87e0519fb9a0990c51d7abd6885", + strip_prefix = "golang.org/x/xerrors@v0.0.0-20240903120638-7835f813f4da", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20231012003039-104605ab7028.zip", - "http://ats.apps.svc/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20231012003039-104605ab7028.zip", - "https://cache.hawkingrei.com/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20231012003039-104605ab7028.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20231012003039-104605ab7028.zip", + "http://bazel-cache.pingcap.net:8080/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20240903120638-7835f813f4da.zip", + "http://ats.apps.svc/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20240903120638-7835f813f4da.zip", + "https://cache.hawkingrei.com/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20240903120638-7835f813f4da.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/golang.org/x/xerrors/org_golang_x_xerrors-v0.0.0-20240903120638-7835f813f4da.zip", ], ) go_repository( diff --git a/lightning/pkg/importer/dup_detect.go b/lightning/pkg/importer/dup_detect.go index 8539c24653fea..e5167246a93d9 100644 --- a/lightning/pkg/importer/dup_detect.go +++ b/lightning/pkg/importer/dup_detect.go @@ -203,7 +203,6 @@ func (d *dupDetector) addKeysByChunk( adder *duplicate.KeyAdder, chunk *checkpoints.ChunkCheckpoint, ) error { - chunk.FileMeta.ParquetMeta.UseStreaming = true parser, err := openParser(ctx, d.rc.cfg, chunk, d.rc.ioWorkers, d.rc.store, d.tr.tableInfo.Core) if err != nil { return err diff --git a/lightning/pkg/importer/import.go b/lightning/pkg/importer/import.go index b2dd2568803d0..5e460ab09b3fd 100644 --- a/lightning/pkg/importer/import.go +++ b/lightning/pkg/importer/import.go @@ -547,8 +547,6 @@ func (rc *Controller) Close() { func (rc *Controller) Run(ctx context.Context) error { failpoint.Inject("beforeRun", func() {}) - mydump.ConfigureReaderLimitForParquet(rc.cfg.App.MaxMemoryUsage) - opts := []func(context.Context) error{ rc.setGlobalVariables, rc.restoreSchema, @@ -1548,10 +1546,6 @@ func (rc *Controller) importTables(ctx context.Context) (finalErr error) { default: } - // All tables are read, we can free memory used for parquet. - logTask.Info("Read table done, free memory and call GC") - mydump.ReleaseMemoryForParquet() - postProgress = func() error { close(postProcessTaskChan) // otherwise, we should run all tasks in the post-process task chan diff --git a/lightning/pkg/importer/table_import.go b/lightning/pkg/importer/table_import.go index c4ecfe0aa21fc..f837983c6aadb 100644 --- a/lightning/pkg/importer/table_import.go +++ b/lightning/pkg/importer/table_import.go @@ -780,7 +780,6 @@ ChunkLoop: setError(err) break } - cr, err := newChunkProcessor(ctx, chunkIndex, rc.cfg, chunk, rc.ioWorkers, rc.store, tr.tableInfo.Core) if err != nil { setError(err) diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 0ddea1c487c94..8caa9960fea7e 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -53,6 +53,7 @@ type encodeAndSortOperator struct { ctx context.Context cancel context.CancelFunc collector execute.Collector + pool *mydump.Pool taskID, subtaskID int64 tableImporter *importer.TableImporter @@ -72,12 +73,14 @@ func newEncodeAndSortOperator( collector execute.Collector, subtaskID int64, concurrency int, + memPool *mydump.Pool, ) *encodeAndSortOperator { subCtx, cancel := context.WithCancel(ctx) op := &encodeAndSortOperator{ ctx: subCtx, cancel: cancel, collector: collector, + pool: memPool, taskID: executor.taskID, subtaskID: subtaskID, tableImporter: executor.tableImporter, @@ -198,7 +201,7 @@ func (w *chunkWorker) HandleTask(task *importStepMinimalTask, _ func(workerpool. // we don't use the input send function, it makes workflow more complex // we send result to errCh and handle it here. executor := newImportMinimalTaskExecutor(task) - if err := executor.Run(w.ctx, w.dataWriter, w.indexWriter, w.op.collector); err != nil { + if err := executor.Run(w.ctx, w.dataWriter, w.indexWriter, w.op.collector, w.op.pool); err != nil { w.op.onError(err) } } @@ -238,15 +241,12 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) threadCnt = plan.EncodeThreadCnt } + // We use a portion of the total available memory for data writer, which is depended + // on the data format, and the other half for encoding and other stuffs, it's an + // experience value, might not optimal. memPerCon := resource.Mem.Capacity() / int64(threadCnt) + memForWriter := mydump.GetMemoryForWriter(plan.Format, int(memPerCon)) - // For parquet format, we allocate 40% of the memory to file reader. - if plan.Format == importer.DataFormatParquet { - memPerCon = memPerCon * (100 - mydump.ImportIntoReaderUsage) / 100 - } - - // we use half of the total available memory for data writer, and the other half - // for encoding and other stuffs, it's an experience value, might not optimal. // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer // takes 3 shares, and each index KV writer takes 1 share. // suppose we have memPerCon = 2G @@ -256,7 +256,7 @@ func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) // | 1 | 768/256 MiB | // | 5 | 384/128 MiB | // | 13 | 192/64 MiB | - memPerShare := float64(memPerCon) / 2 / float64(indexKVGroupCnt+3) + memPerShare := float64(memForWriter) / float64(indexKVGroupCnt+3) return uint64(memPerShare * 3), uint64(memPerShare) } diff --git a/pkg/disttask/importinto/subtask_executor.go b/pkg/disttask/importinto/subtask_executor.go index 2c330973932b9..b52e96d5f78c9 100644 --- a/pkg/disttask/importinto/subtask_executor.go +++ b/pkg/disttask/importinto/subtask_executor.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/pkg/lightning/backend/local" "github.com/pingcap/tidb/pkg/lightning/checkpoints" "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/lightning/mydump" verify "github.com/pingcap/tidb/pkg/lightning/verification" "github.com/pingcap/tidb/pkg/resourcegroup" "github.com/pingcap/tidb/pkg/sessionctx" @@ -40,7 +41,7 @@ import ( // MiniTaskExecutor is the interface for a minimal task executor. // exported for testing. type MiniTaskExecutor interface { - Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter, collector execute.Collector) error + Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter, collector execute.Collector, pool *mydump.Pool) error } // importMinimalTaskExecutor is a minimal task executor for IMPORT INTO. @@ -60,6 +61,7 @@ func (e *importMinimalTaskExecutor) Run( ctx context.Context, dataWriter, indexWriter backend.EngineWriter, collector execute.Collector, + pool *mydump.Pool, ) error { logger := logutil.BgLogger().With(zap.Stringer("type", proto.ImportInto), zap.Int64("table-id", e.mTtask.Plan.TableInfo.ID)) logger.Info("execute chunk") @@ -69,6 +71,7 @@ func (e *importMinimalTaskExecutor) Run( }) failpoint.InjectCall("syncBeforeSortChunk") chunkCheckpoint := toChunkCheckpoint(e.mTtask.Chunk) + chunkCheckpoint.FileMeta.ParquetMeta.MemoryPool = pool sharedVars := e.mTtask.SharedVars checksum := verify.NewKVGroupChecksumWithKeyspace(sharedVars.TableImporter.GetKeySpace()) if sharedVars.TableImporter.IsLocalSort() { diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index c14ae8d1d0ef6..a7a9a254cc26e 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -76,6 +76,7 @@ type importStepExecutor struct { wg sync.WaitGroup summary execute.SubtaskSummary + memPool *mydump.Pool } func getTableImporter( @@ -113,11 +114,8 @@ func (s *importStepExecutor) Init(ctx context.Context) error { } s.tableImporter = tableImporter - if s.taskMeta.Plan.Format == importer.DataFormatParquet { - mydump.ConfigureReaderLimitForParquet(mydump.ImportIntoReaderUsage) - if s.tableImporter.EncodeThreadCnt > 0 { - s.tableImporter.Plan.ThreadCnt = s.tableImporter.EncodeThreadCnt - } + if s.taskMeta.Plan.Format == importer.DataFormatParquet && s.tableImporter.EncodeThreadCnt > 0 { + s.tableImporter.Plan.ThreadCnt = s.tableImporter.EncodeThreadCnt } // we need this sub context since Cleanup which wait on this routine is called @@ -199,8 +197,12 @@ func (s *importStepExecutor) RunSubtask(ctx context.Context, subtask *proto.Subt } s.sharedVars.Store(subtaskMeta.ID, sharedVars) + if s.memPool == nil { + s.memPool = mydump.GetPool(int(s.GetResource().Mem.Capacity())) + } + source := operator.NewSimpleDataChannel(make(chan *importStepMinimalTask)) - op := newEncodeAndSortOperator(ctx, s, sharedVars, s, subtask.ID, int(s.GetResource().CPU.Capacity())) + op := newEncodeAndSortOperator(ctx, s, sharedVars, s, subtask.ID, int(s.GetResource().CPU.Capacity()), s.memPool) op.SetSource(source) pipeline := operator.NewAsyncPipeline(op) if err = pipeline.Execute(); err != nil { @@ -311,10 +313,6 @@ func (s *importStepExecutor) onFinished(ctx context.Context, subtask *proto.Subt } func (s *importStepExecutor) Cleanup(_ context.Context) (err error) { - if s.taskMeta.Plan.Format == importer.DataFormatParquet { - mydump.ReleaseMemoryForParquet() - } - s.logger.Info("cleanup subtask env") s.importCancel() s.wg.Wait() diff --git a/pkg/executor/importer/BUILD.bazel b/pkg/executor/importer/BUILD.bazel index 3313db4bffbc2..bf4dd85a12fc5 100644 --- a/pkg/executor/importer/BUILD.bazel +++ b/pkg/executor/importer/BUILD.bazel @@ -76,6 +76,7 @@ go_library( "//pkg/util/sqlkiller", "//pkg/util/stringutil", "//pkg/util/syncutil", + "//pkg/util/timeutil", "@com_github_docker_go_units//:go-units", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", diff --git a/pkg/executor/importer/chunk_process.go b/pkg/executor/importer/chunk_process.go index bdbdb163d8a2a..88ae49a2b3460 100644 --- a/pkg/executor/importer/chunk_process.go +++ b/pkg/executor/importer/chunk_process.go @@ -306,8 +306,11 @@ func (p *chunkEncoder) encodeLoop(ctx context.Context) error { p.collector.Add(delta, int64(rowCount)) } - avgRowSize := delta / int64(rowCount) - rowBatchSize := MinDeliverBytes * 3 / 2 / uint64(avgRowSize) + rowBatchSize := MinDeliverRowCnt + if delta > int64(rowCount) { + avgRowSize := delta / int64(rowCount) + rowBatchSize = min(MinDeliverRowCnt, int(MinDeliverBytes)*3/2/int(avgRowSize)) + } // the ownership of rowBatch is transferred to the receiver of sendFn, we should // not touch it anymore. diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index ee89f9ef30fe4..7cad4ad15388a 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -1299,8 +1299,9 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { + // We may not be able to open ThreadCnt files concurrently due to memory usage _, memoryUsage, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) - encodeThreadCnt := mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) + e.Plan.EncodeThreadCnt = mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) if err != nil { return errors.Trace(err) @@ -1311,10 +1312,6 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { MemoryUsage: memoryUsage, } } - - // Because we may not be able to open ThreadCnt files concurrently, - // we can adjust thread count for parquet here. - e.Plan.EncodeThreadCnt = encodeThreadCnt } failpoint.Label("afterReadFiles") diff --git a/pkg/executor/importer/table_import.go b/pkg/executor/importer/table_import.go index a4e1e9197ecaf..21402e03a5ff0 100644 --- a/pkg/executor/importer/table_import.go +++ b/pkg/executor/importer/table_import.go @@ -471,12 +471,7 @@ func (e *LoadDataController) PopulateChunks(ctx context.Context) (chunksMap map[ func (ti *TableImporter) getTotalRawFileSize(indexCnt int64) int64 { var totalSize int64 for _, file := range ti.dataFiles { - size := file.RealSize - if file.Type == mydump.SourceTypeParquet { - // parquet file is compressed, thus estimates with a factor of 2 - size *= 2 - } - totalSize += size + totalSize += file.RealSize } return totalSize * indexCnt } diff --git a/pkg/lightning/backend/kv/sql2kv.go b/pkg/lightning/backend/kv/sql2kv.go index e89d59ee7c5ca..31666a97fa947 100644 --- a/pkg/lightning/backend/kv/sql2kv.go +++ b/pkg/lightning/backend/kv/sql2kv.go @@ -240,7 +240,6 @@ func (kvcodec *tableKVEncoder) Encode(row []types.Datum, } if common.TableHasAutoRowID(kvcodec.table.Meta()) { - var value types.Datum rowValue := rowID j := columnPermutation[len(kvcodec.Columns)] if j >= 0 && j < len(row) { diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index ad47a670dcd72..87a1290c30fc4 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -334,9 +334,6 @@ type Lightning struct { CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"` - // max memory used for memory arena used for parquet file - MaxMemoryUsage int `toml:"max-memory-usage" json:"max-memory-usage"` - MaxError MaxError `toml:"max-error" json:"max-error"` // deprecated, use Conflict.MaxRecordRows instead MaxErrorRecords int64 `toml:"max-error-records" json:"max-error-records"` @@ -354,9 +351,6 @@ func (l *Lightning) adjust(i *TikvImporter) { if l.IndexConcurrency == 0 { l.IndexConcurrency = l.RegionConcurrency } - if l.MaxMemoryUsage == 0 { - l.MaxMemoryUsage = defaultMemoryUsageTiDB - } case BackendLocal: if l.IndexConcurrency == 0 { l.IndexConcurrency = defaultIndexConcurrency @@ -364,9 +358,6 @@ func (l *Lightning) adjust(i *TikvImporter) { if l.TableConcurrency == 0 { l.TableConcurrency = DefaultTableConcurrency } - if l.MaxMemoryUsage == 0 { - l.MaxMemoryUsage = defaultMemoryUsageLocal - } if len(l.MetaSchemaName) == 0 { l.MetaSchemaName = defaultMetaSchemaName } @@ -1459,7 +1450,6 @@ func NewConfig() *Config { RegionConcurrency: runtime.NumCPU(), TableConcurrency: 0, IndexConcurrency: 0, - MaxMemoryUsage: 40, IOConcurrency: 5, CheckRequirements: true, TaskInfoSchemaName: defaultTaskInfoSchemaName, diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index d4a4e0d2181ae..654b90c2c69b2 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -4,11 +4,14 @@ go_library( name = "mydump", srcs = [ "allocator.go", + "append_only_allocator.go", "bytes.go", "charset_convertor.go", "csv_parser.go", + "list_allocator.go", "loader.go", "parquet_parser.go", + "parquet_type_converter.go", "parquet_writer.go", "parser.go", "parser_generated.go", @@ -16,7 +19,6 @@ go_library( "region.go", "router.go", "schema_import.go", - "simple_allocator.go", ], importpath = "github.com/pingcap/tidb/pkg/lightning/mydump", visibility = ["//visibility:public"], @@ -27,7 +29,6 @@ go_library( "//pkg/lightning/common", "//pkg/lightning/config", "//pkg/lightning/log", - "//pkg/lightning/membuf", "//pkg/lightning/metric", "//pkg/lightning/worker", "//pkg/parser", @@ -37,6 +38,7 @@ go_library( "//pkg/parser/mysql", "//pkg/types", "//pkg/util", + "//pkg/util/cpu", "//pkg/util/filter", "//pkg/util/intest", "//pkg/util/logutil", @@ -46,16 +48,16 @@ go_library( "//pkg/util/slice", "//pkg/util/sqlescape", "//pkg/util/table-filter", + "//pkg/util/timeutil", "//pkg/util/zeropool", + "@com_github_apache_arrow_go_v18//arrow/memory", + "@com_github_apache_arrow_go_v18//parquet", + "@com_github_apache_arrow_go_v18//parquet/compress", + "@com_github_apache_arrow_go_v18//parquet/file", + "@com_github_apache_arrow_go_v18//parquet/schema", "@com_github_go_sql_driver_mysql//:mysql", - "@com_github_joechenrh_arrow_go_v18//arrow/memory", - "@com_github_joechenrh_arrow_go_v18//parquet", - "@com_github_joechenrh_arrow_go_v18//parquet/compress", - "@com_github_joechenrh_arrow_go_v18//parquet/file", - "@com_github_joechenrh_arrow_go_v18//parquet/schema", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", - "@com_github_pingcap_log//:log", "@com_github_spkg_bom//:bom", "@org_golang_x_sync//errgroup", "@org_golang_x_text//encoding", @@ -106,10 +108,10 @@ go_test( "//pkg/util/logutil", "//pkg/util/table-filter", "//pkg/util/table-router", + "@com_github_apache_arrow_go_v18//parquet", + "@com_github_apache_arrow_go_v18//parquet/schema", "@com_github_data_dog_go_sqlmock//:go-sqlmock", "@com_github_go_sql_driver_mysql//:mysql", - "@com_github_joechenrh_arrow_go_v18//parquet", - "@com_github_joechenrh_arrow_go_v18//parquet/schema", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_stretchr_testify//assert", diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index 4c771d32fa0a4..b662dd974fbe9 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -15,113 +15,94 @@ package mydump import ( - "math" - "runtime" - "runtime/debug" - "sync" "unsafe" - "github.com/apache/arrow-go/v18/arrow/memory" - "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/lightning/membuf" + "github.com/pingcap/tidb/pkg/util/cpu" tidbmemory "github.com/pingcap/tidb/pkg/util/memory" - "go.uber.org/zap" ) -// ImportIntoReaderUsage is the percentage of memory usage for parquet reader -const ImportIntoReaderUsage = 40 - var ( - // size of each arena - defaultArenaSize = 256 << 20 + // arenaSize is the size of each arena + arenaSize = 64 << 20 - // memory limit for parquet reader - readerMemoryLimit int - readerMemoryLimiter *membuf.Limiter + // parserMemoryPercent defines the percentage of memory used for parser + parserMemoryPercent = 0.3 - // globalPool is used for all parquet import tasks. - // We use importCount to track and release memory. - lk sync.Mutex - globalPool *membuf.Pool - importCount int + // parquetWriterPercent defines the percentage of memory used for parquet writer + parquetWriterPercent = 0.4 - // GetArena creates a new arena - GetArena func(*membuf.Buffer) arena + // otherWriterPercent defines the percentage of memory used for csv writer + otherWriterPercent = 0.5 ) -// ConfigureReaderLimitForParquet set the memory limit for parquet reader. -// Remember to call ReleaseMemoryForParquet to free the memory. -func ConfigureReaderLimitForParquet(percent int) { - lk.Lock() - defer lk.Unlock() - - importCount++ - if importCount > 1 { - return +// GetMemoryForWriter gets the memory for writer according to the file type. +func GetMemoryForWriter(tp string, memPerCon int) int { + switch tp { + case "parquet": + return int(float64(memPerCon) * parquetWriterPercent) + default: + return int(float64(memPerCon) * otherWriterPercent) } +} - // Set a hard limit to percent, which is derived from manual testing. - percent = min(percent, 75) - - memTotal, err := tidbmemory.MemTotal() +// AdjustEncodeThreadCnt adjusts the concurrency in encode&sort step for parquet IMPORT INTO. +func AdjustEncodeThreadCnt(memoryPerFile, threadCnt int) int { + totalCPU := cpu.GetCPUCount() + totalMem, err := tidbmemory.MemTotal() if err != nil { - log.L().Warn("Fail to get total memory") - // Set limit to int max, which means no limiter - memTotal = math.MaxInt32 + return threadCnt } - readerMemoryLimit = int(memTotal) * percent / 100 - readerMemoryLimiter = membuf.NewLimiter(readerMemoryLimit) - - gcPercent := (10000/percent - 100) / 10 * 10 - gcPercent = max(gcPercent, 10) - gcPercent = min(gcPercent, 50) - debug.SetGCPercent(gcPercent) - - globalPool = membuf.NewPool( - membuf.WithBlockNum(readerMemoryLimit/defaultArenaSize), - membuf.WithBlockSize(defaultArenaSize), - ) - - log.L().Info("set memory limit", - zap.Int("total memory", int(memTotal)), - zap.Int("memory limit", readerMemoryLimit), - zap.Int("GC Percentage", gcPercent), - ) -} -// ReleaseMemoryForParquet releases memory allocated for parquet readers. -func ReleaseMemoryForParquet() { - lk.Lock() - defer lk.Unlock() - - importCount-- - if importCount == 0 { - globalPool.Destroy() - globalPool = nil - debug.SetGCPercent(100) - //nolint: all_revive,revive - runtime.GC() + if totalCPU <= 0 || totalMem <= 0 { + return threadCnt } + + // Use half of memory per conn for parquet parser + memForImport := int(float64(int(totalMem)/totalCPU)*parserMemoryPercent) * threadCnt + optimalThreads := memForImport / memoryPerFile + return max(1, min(optimalThreads, threadCnt)) } -// AdjustEncodeThreadCnt adjust the concurrency in encode&sort step for parquet file. -// It's used for IMPORT INTO. -func AdjustEncodeThreadCnt( - memoryUsage, threadCnt int, -) int { - memTotal, err := tidbmemory.MemTotal() - if err != nil { - return threadCnt +// Pool manages a pool of reusable byte buffers to reduce memory allocation overhead. +// It uses a buffered channel to store and reuse buffers efficiently. +type Pool struct { + blockSize int + blockCache chan []byte +} + +// GetPool gets a pool with the given capacity. +func GetPool(capacity int) *Pool { + mem := int(float64(capacity) * parserMemoryPercent) + return &Pool{ + blockSize: arenaSize, + blockCache: make(chan []byte, (mem+arenaSize-1)/arenaSize), } +} - return max(min(int(memTotal)*ImportIntoReaderUsage/100/memoryUsage, threadCnt), 1) +// Get retrieves a buffer from the pool or allocates a new one if the pool is empty. +func (p *Pool) Get() []byte { + select { + case buf := <-p.blockCache: + return buf + default: + return make([]byte, p.blockSize) + } } -func init() { - GetArena = getSimpleAllocator +func (p *Pool) Put(buf []byte) { + if buf == nil { + return + } + + select { + case p.blockCache <- buf: + default: + // Pool is full, discard the buffer + } } -// Get the address of a buffer, return 0 if the buffer is nil +// addressOf returns the address of a buffer, return 0 if the buffer is nil or empty. +// This is used to create unique identifiers for tracking buffer allocations. func addressOf(buf []byte) uintptr { if buf == nil || cap(buf) == 0 { return 0 @@ -130,76 +111,8 @@ func addressOf(buf []byte) uintptr { return uintptr(unsafe.Pointer(&buf[0])) } -// arena is the interface of single allocator type arena interface { allocate(int) []byte free([]byte) reset() } - -type defaultAllocator struct { - mu sync.Mutex - arenas []arena - mbufs []*membuf.Buffer - - allocatedBuf map[uintptr]int -} - -func (alloc *defaultAllocator) Allocate(size int) []byte { - alloc.mu.Lock() - defer alloc.mu.Unlock() - for i, a := range alloc.arenas { - if buf := a.allocate(size); buf != nil { - alloc.allocatedBuf[addressOf(buf)] = i - return buf - } - } - - var mbuf *membuf.Buffer - if globalPool != nil { - mbuf = globalPool.NewBuffer() - alloc.mbufs = append(alloc.mbufs, mbuf) - } - - na := GetArena(mbuf) - buf := na.allocate(size) - alloc.allocatedBuf[addressOf(buf)] = len(alloc.arenas) - alloc.arenas = append(alloc.arenas, na) - return buf -} - -func (alloc *defaultAllocator) Free(buf []byte) { - alloc.mu.Lock() - defer alloc.mu.Unlock() - addr := addressOf(buf) - if arenaID, ok := alloc.allocatedBuf[addr]; ok { - alloc.arenas[arenaID].free(buf) - delete(alloc.allocatedBuf, addr) - } -} - -func (alloc *defaultAllocator) Reallocate(size int, buf []byte) []byte { - alloc.Free(buf) - return alloc.Allocate(size) -} - -func (alloc *defaultAllocator) Close() { - for _, a := range alloc.arenas { - a.reset() - } - for _, mbuf := range alloc.mbufs { - mbuf.Destroy() - } - alloc.arenas = nil -} - -func (alloc *defaultAllocator) Allocated() int { - return defaultArenaSize * len(alloc.arenas) -} - -// GetAllocator get a default allocator -func GetAllocator() memory.Allocator { - return &defaultAllocator{ - allocatedBuf: make(map[uintptr]int, 32), - } -} diff --git a/pkg/lightning/mydump/allocator_test.go b/pkg/lightning/mydump/allocator_test.go index b69e6e1b41d21..83d0fb25edcfa 100644 --- a/pkg/lightning/mydump/allocator_test.go +++ b/pkg/lightning/mydump/allocator_test.go @@ -24,8 +24,10 @@ import ( ) func TestSimpleAllocator(t *testing.T) { - defaultArenaSize = 16 << 20 - a := getSimpleAllocator(nil) + arenaSize = 16 << 20 + + pool := GetPool(16 << 23) + a := NewAppendOnlyAllocator(pool) var ( lk sync.Mutex @@ -46,14 +48,14 @@ func TestSimpleAllocator(t *testing.T) { default: lk.Lock() bufSize := allocSize[rand.Intn(len(allocSize))] - buf := a.allocate(bufSize) + buf := a.Allocate(bufSize) lk.Unlock() // hold for sometimes time.Sleep(time.Millisecond) lk.Lock() - a.free(buf) + a.Free(buf) lk.Unlock() } } @@ -65,4 +67,6 @@ func TestSimpleAllocator(t *testing.T) { go allocFunc(ctx) } wg.Wait() + + a.check() } diff --git a/pkg/lightning/mydump/append_only_allocator.go b/pkg/lightning/mydump/append_only_allocator.go new file mode 100644 index 0000000000000..f014ccfbc0c6c --- /dev/null +++ b/pkg/lightning/mydump/append_only_allocator.go @@ -0,0 +1,167 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mydump + +import ( + "sync" + "sync/atomic" +) + +type appendOnlySlice struct { + buf []byte + offset int + counter int + mu sync.Mutex +} + +func (s *appendOnlySlice) allocate(size int) []byte { + s.mu.Lock() + defer s.mu.Unlock() + + if s.offset+size <= len(s.buf) { + start := s.offset + s.offset += size + s.counter++ + return s.buf[start : start+size] + } + return nil +} + +func (s *appendOnlySlice) free() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.counter > 0 { + s.counter-- + if s.counter == 0 { + s.offset = 0 + } + } +} + +type appendOnlyAllocator struct { + pool *Pool + slices []*appendOnlySlice + slicesMutex sync.RWMutex + mapper sync.Map + + nextAllocIdx atomic.Int32 + + externalMemoryCurrent atomic.Int64 + externalMemoryMax atomic.Int64 +} + +func NewAppendOnlyAllocator(pool *Pool) *appendOnlyAllocator { + alloc := &appendOnlyAllocator{ + pool: pool, + } + for range 2 { + alloc.slices = append(alloc.slices, &appendOnlySlice{buf: pool.Get()}) + } + return alloc +} + +func (a *appendOnlyAllocator) Allocate(size int) []byte { + if size >= arenaSize { + buf := make([]byte, size) + a.mapper.Store(addressOf(buf), -1) + + // Many not accurate but enough for estimation + current := a.externalMemoryCurrent.Add(int64(size)) + if current > a.externalMemoryMax.Load() { + a.externalMemoryMax.Store(current) + } + return buf + } + + if buf := a.allocate(size); buf != nil { + return buf + } + return a.getAndAllocate(size) +} + +func (a *appendOnlyAllocator) allocate(size int) []byte { + a.slicesMutex.RLock() + defer a.slicesMutex.RUnlock() + + sliceLen := len(a.slices) + idx := int(a.nextAllocIdx.Add(1)) % sliceLen + for i := range sliceLen { + sel := (idx + i) % sliceLen + if buf := a.slices[sel].allocate(size); buf != nil { + a.mapper.Store(addressOf(buf), sel) + return buf + } + } + + return nil +} + +func (a *appendOnlyAllocator) getAndAllocate(size int) []byte { + a.slicesMutex.Lock() + defer a.slicesMutex.Unlock() + + newSlice := &appendOnlySlice{buf: a.pool.Get()} + buf := newSlice.allocate(size) + a.slices = append(a.slices, newSlice) + a.mapper.Store(addressOf(buf), len(a.slices)-1) + return buf +} + +func (a *appendOnlyAllocator) Free(buf []byte) { + addr := addressOf(buf) + v, ok := a.mapper.Load(addr) + if !ok { + return + } + + a.slicesMutex.RLock() + defer a.slicesMutex.RUnlock() + + id, _ := v.(int) + if id == -1 { + a.externalMemoryCurrent.Add(-int64(len(buf))) + } else { + a.slices[id].free() + } + + a.mapper.Delete(addr) +} + +func (a *appendOnlyAllocator) Reallocate(size int, buf []byte) []byte { + a.Free(buf) + return a.Allocate(size) +} + +func (a *appendOnlyAllocator) Allocated() int { + return arenaSize*len(a.slices) + int(a.externalMemoryMax.Load()) +} + +func (a *appendOnlyAllocator) Close() { + for _, s := range a.slices { + a.pool.Put(s.buf) + } +} + +func (a *appendOnlyAllocator) check() { + a.mapper.Range(func(key, value any) bool { + panic("memory leak detected in appendOnlyAllocator") + }) + for _, s := range a.slices { + if s.counter != 0 { + panic("memory leak detected in appendOnlyAllocator") + } + } +} diff --git a/pkg/lightning/mydump/simple_allocator.go b/pkg/lightning/mydump/list_allocator.go similarity index 50% rename from pkg/lightning/mydump/simple_allocator.go rename to pkg/lightning/mydump/list_allocator.go index 62586655e4a85..5b2ccdce3e4ba 100644 --- a/pkg/lightning/mydump/simple_allocator.go +++ b/pkg/lightning/mydump/list_allocator.go @@ -16,19 +16,21 @@ package mydump import ( "math" + "sync" - "github.com/pingcap/tidb/pkg/lightning/membuf" + "github.com/apache/arrow-go/v18/arrow/memory" "github.com/pingcap/tidb/pkg/util/intest" ) const ( - // Size of metadata of each block + // metaSize is the size of metadata for each allocated block (64 bytes for alignment) metaSize = 64 - invalid = math.MaxInt32 - // The allocated memory size will be aligned to the nearest multiple of alignSize. - // This value will be modifed in test - alignSize = 16 << 10 + // invalid represents an invalid offset/pointer in the linked list + invalid = math.MaxInt32 + + // alignSize defines the alignment boundary for memory allocation. + alignSize = 1 << 10 ) func roundUp(n, sz int) int { @@ -47,21 +49,36 @@ func readInt(buf []byte) int { } /* -simpleAllocator is a very simple allocator with low allocation efficiency -which manages allocated memory using a linked list structure. - -It is used in parquet reader and it's sufficient for our scenario -as memory allocation will not be a bottleneck. - -The memory layout is as follows: - - --------------------| - | v - ------------------------------------------------------------------------- - | | s | p | n | xxxx | | s | p | n | xxxx | | - ------------------------------------------------------------------------- - ^ | - |___________________________________| +simpleAllocator is a memory allocator that manages allocated memory using a linked list structure. +It provides basic memory allocation and deallocation with automatic merging of adjacent free blocks +to reduce fragmentation. + +While this allocator has relatively low allocation efficiency due to its linear search algorithm, +it is sufficient for parquet reader scenarios where memory allocation is not the primary bottleneck. + +Memory Layout: +Each block has the following structure: +- Size (4 bytes): Size of the block including metadata +- Previous offset (4 bytes): Offset to the previous block in the free list +- Next offset (4 bytes): Offset to the next block in the free list +- Reserved (52 bytes): Reserved space for alignment +- Data: The actual allocated data follows the metadata + +The allocator maintains a linked list of free blocks: + + ┌───────────────────────────┐ + │ ▼ + ┌─────────────────────────────────────────────────────────────────┐ + │ │ s │ p │ n │ xxxx │ │ s │ p │ n │ xxxx │ │ + └─────────────────────────────────────────────────────────────────┘ + ▲ │ + └───────────────────────────────────┘ + +Where: +- s = size of block +- p = previous block offset +- n = next block offset +- xxxx = reserved space */ type simpleAllocator struct { buf []byte @@ -72,14 +89,7 @@ type simpleAllocator struct { bytesAloc int } -func getSimpleAllocator(mbuf *membuf.Buffer) arena { - var buf []byte - if mbuf != nil { - buf = mbuf.AllocBytes(defaultArenaSize) - } else { - buf = make([]byte, defaultArenaSize) - } - +func getSimpleAllocator(buf []byte) arena { a := &simpleAllocator{ buf: buf, base: int(addressOf(buf)), @@ -112,21 +122,22 @@ func (sa *simpleAllocator) getBlk(offset int) (prev, next, blkSize int) { } func (sa *simpleAllocator) insertFree(free int) { + _, _, freeSize := sa.getBlk(free) + for offset := 0; offset != invalid; { if free > offset { - _, _, blkSize := sa.getBlk(free) _, next, _ := sa.getBlk(offset) sa.setBlk(offset, -1, free, -1) sa.setBlk(free, offset, next, -1) sa.setBlk(next, free, -1, -1) - sa.bytesAloc -= blkSize + sa.bytesAloc -= freeSize return } } panic("Error insertFree") } -// Merge adjacent free blocks into one big free block to reduce fragmentation. +// merge coalesces adjacent free blocks into larger blocks to reduce fragmentation. func (sa *simpleAllocator) merge() { for offset := 0; offset != invalid; { _, next, blkSize := sa.getBlk(offset) @@ -185,7 +196,7 @@ func (sa *simpleAllocator) allocate(size int) []byte { func (sa *simpleAllocator) free(buf []byte) { offset := sa.getOffset(buf) - if offset < 0 || offset > len(sa.buf) { + if offset < 0 || offset >= len(sa.buf) { return } @@ -199,11 +210,6 @@ func (sa *simpleAllocator) free(buf []byte) { sa.sanityCheck() } -func (sa *simpleAllocator) reallocate(buf []byte, size int) []byte { - sa.free(buf) - return sa.allocate(size) -} - func (sa *simpleAllocator) sanityCheck() { if !intest.InTest { return @@ -216,16 +222,89 @@ func (sa *simpleAllocator) sanityCheck() { offset = next } if mem != (len(sa.buf) - 3*alignSize) { - panic("sanity check failed") + panic("sanity check failed: memory accounting mismatch") } } func (sa *simpleAllocator) reset() { + sa.blocksAlloc = 0 sa.bytesAloc = 0 - // Add dummy head and tail block to simplify the allocation logic total := len(sa.buf) sa.setBlk(0, invalid, alignSize, 0) sa.setBlk(alignSize, 0, total-alignSize, total-alignSize*3) sa.setBlk(total-alignSize, alignSize, invalid, 0) } + +// listAllocator implements memory.Allocator interface using multiple arenas. +type listAllocator struct { + mu sync.RWMutex + + arenas []arena + mbufs [][]byte + pool *Pool + + allocatedBuf map[uintptr]int +} + +func (alloc *listAllocator) Allocate(size int) []byte { + if size >= arenaSize { + return make([]byte, size) + } + + alloc.mu.Lock() + defer alloc.mu.Unlock() + + for i, a := range alloc.arenas { + if buf := a.allocate(size); buf != nil { + alloc.allocatedBuf[addressOf(buf)] = i + return buf + } + } + + mbuf := alloc.pool.Get() + alloc.mbufs = append(alloc.mbufs, mbuf) + + na := getSimpleAllocator(mbuf) + alloc.arenas = append(alloc.arenas, na) + + buf := na.allocate(size) + arenaIndex := len(alloc.arenas) + alloc.allocatedBuf[addressOf(buf)] = arenaIndex + + return buf +} + +func (alloc *listAllocator) Free(buf []byte) { + addr := addressOf(buf) + alloc.mu.Lock() + defer alloc.mu.Unlock() + + if arenaID, ok := alloc.allocatedBuf[addr]; ok { + alloc.arenas[arenaID].free(buf) + delete(alloc.allocatedBuf, addr) + } +} + +func (alloc *listAllocator) Reallocate(size int, buf []byte) []byte { + alloc.Free(buf) + return alloc.Allocate(size) +} + +func (alloc *listAllocator) Close() { + for _, mbuf := range alloc.mbufs { + alloc.pool.Put(mbuf) + } +} + +func (alloc *listAllocator) Allocated() int { + return arenaSize * len(alloc.arenas) +} + +// NewAllocator creates a new default allocator with the given pool. +func NewAllocator(pool *Pool) memory.Allocator { + return &listAllocator{ + pool: pool, + allocatedBuf: make(map[uintptr]int, 32), + } +} diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index cec2863d6d919..1810cc3a9459b 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -94,6 +94,7 @@ type MDTableMeta struct { type ParquetFileMeta struct { Rows int64 // row count MemoryUsage int // memory usage for reader + MemoryPool *Pool Loc *time.Location } diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 71cdf9fce641e..a854e5820a57f 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1194,7 +1194,7 @@ func testSampleParquetDataSize(t *testing.T, count int) { } md.WriteParquetFile(s.sourceDir, fileName, pc, count) - rowSize, _, _, err := md.SampleStatisticsFromParquet(ctx, md.SourceFileMeta{ + rowSize, _, err := md.SampleStatisticsFromParquet(ctx, md.SourceFileMeta{ Path: fileName, }, store) require.NoError(t, err) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index 9518d52ffebad..575b67f9299a6 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -28,7 +28,6 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/pkg/lightning/log" - "github.com/pingcap/tidb/pkg/lightning/membuf" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/logutil" "github.com/pingcap/tidb/pkg/util/timeutil" @@ -305,9 +304,6 @@ type ParquetParser struct { lastRow Row logger log.Logger - - memoryUsage int - memLimiter *membuf.Limiter } // Init initializes the Parquet parser and allocate necessary buffers @@ -407,10 +403,6 @@ func (pp *ParquetParser) Close() error { if a, ok := pp.alloc.(interface{ Close() }); ok { a.Close() } - - if pp.memLimiter != nil { - pp.memLimiter.Release(pp.memoryUsage) - } }() pp.logger.Info("[parquet parser test] Close parquet parser") @@ -541,17 +533,10 @@ func NewParquetParser( path string, meta ParquetFileMeta, ) (*ParquetParser, error) { - // Acquire memory limiter first - memoryUsage := min(meta.MemoryUsage, readerMemoryLimit) - if readerMemoryLimiter != nil { - readerMemoryLimiter.Acquire(memoryUsage) - } - logger := log.Wrap(logutil.Logger(ctx)) logger.Info("Get memory usage of parquet reader", zap.String("file", path), - zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), - zap.String("memory limit", fmt.Sprintf("%d MB", readerMemoryLimit>>20)), + zap.String("memory usage", fmt.Sprintf("%d MB", meta.MemoryUsage>>20)), ) workerPool := &errgroup.Group{} @@ -568,9 +553,15 @@ func NewParquetParser( } } - allocator := GetAllocator() + var allocator memory.Allocator + allocator = memory.NewGoAllocator() + if meta.MemoryPool != nil { + allocator = NewAppendOnlyAllocator(meta.MemoryPool) + } + prop := parquet.NewReaderProperties(allocator) prop.BufferedStreamEnabled = true + prop.BufferSize = 1024 reader, err := file.NewParquetReader(wrapper, file.WithReadProps(prop), file.WithWorkerPool(workerPool)) if err != nil { @@ -627,8 +618,6 @@ func NewParquetParser( columnNames: columnNames, alloc: allocator, logger: logger, - memoryUsage: memoryUsage, - memLimiter: readerMemoryLimiter, rowPool: &pool, } if err := parser.Init(meta.Loc); err != nil { @@ -653,7 +642,10 @@ func SampleStatisticsFromParquet( return 0, 0, err } - parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, GetDefaultParquetMeta()) + meta := GetDefaultParquetMeta() + meta.MemoryPool = GetPool(2 << 30) // use up to 2GB memory for sampling + + parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, meta) if err != nil { return 0, 0, err } @@ -689,8 +681,8 @@ func SampleStatisticsFromParquet( avgRowSize = float64(rowSize) / float64(rowCount) alloc := parser.alloc - defaultAlloc, _ := alloc.(*defaultAllocator) - memoryUsage = defaultAlloc.Allocated() + defaultArenaSize + a, _ := alloc.(*appendOnlyAllocator) + memoryUsage = a.Allocated() + arenaSize parser.logger.Info("Get memory usage of parquet reader", zap.String("memory usage", fmt.Sprintf("%d MB", memoryUsage>>20)), diff --git a/pkg/lightning/mydump/parquet_type_converter.go b/pkg/lightning/mydump/parquet_type_converter.go index 178dc3f132365..cdc116921ca9a 100644 --- a/pkg/lightning/mydump/parquet_type_converter.go +++ b/pkg/lightning/mydump/parquet_type_converter.go @@ -105,7 +105,10 @@ func getInt32Getter(converted *convertedType, loc *time.Location) setter[int32] mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) } - case schema.ConvertedTypes.Int32, schema.ConvertedTypes.None: + case schema.ConvertedTypes.Int32, schema.ConvertedTypes.Uint32, + schema.ConvertedTypes.Int16, schema.ConvertedTypes.Uint16, + schema.ConvertedTypes.Int8, schema.ConvertedTypes.Uint8, + schema.ConvertedTypes.None: return func(val int32, d *types.Datum) { d.SetInt64(int64(val)) } @@ -116,7 +119,10 @@ func getInt32Getter(converted *convertedType, loc *time.Location) setter[int32] func getInt64Getter(converted *convertedType, loc *time.Location) setter[int64] { switch converted.converted { - case schema.ConvertedTypes.Uint32, schema.ConvertedTypes.Uint64: + case schema.ConvertedTypes.Uint64, + schema.ConvertedTypes.Uint32, schema.ConvertedTypes.Int32, + schema.ConvertedTypes.Uint16, schema.ConvertedTypes.Int16, + schema.ConvertedTypes.Uint8, schema.ConvertedTypes.Int8: return func(val int64, d *types.Datum) { d.SetUint64(uint64(val)) } @@ -162,9 +168,13 @@ func getInt96Data(val parquet.Int96, d *types.Datum, loc *time.Location) { // --------------------------- // | nano sec | julian day | // --------------------------- - // NOTE: parquet date can be less than 1970-01-01 that is not supported by TiDB, - // where dt is a negative number but still legal in the context of Go. - // But it will cause errors or potential data inconsistency when importing. + // NOTE: + // INT96 is a deprecated type in parquet format to store timestamp, which consists of + // two parts: the first 8 bytes is the nanoseconds within the day, and the last 4 bytes + // is the Julian Day (days since noon on January 1, 4713 BC). And it will be converted it to UTC by + // julian day - 2440588 (Julian Day of the Unix epoch 1970-01-01 00:00:00) + // As julian day is decoded as uint32, so if user store a date before 1970-01-01, the converted time will be wrong + // and possibly to be truncated. t := val.ToTime().In(loc) mysqlTime := types.NewTime(types.FromGoTime(t), mysql.TypeTimestamp, 0) d.SetMysqlTime(mysqlTime) diff --git a/tools/gen-parquet/BUILD.bazel b/tools/gen-parquet/BUILD.bazel index 8c8b05315f20a..358a08dc19eea 100644 --- a/tools/gen-parquet/BUILD.bazel +++ b/tools/gen-parquet/BUILD.bazel @@ -6,10 +6,10 @@ go_library( importpath = "github.com/pingcap/tidb/tools/gen-parquet", visibility = ["//visibility:private"], deps = [ - "@com_github_joechenrh_arrow_go_v18//parquet", - "@com_github_joechenrh_arrow_go_v18//parquet/compress", - "@com_github_joechenrh_arrow_go_v18//parquet/file", - "@com_github_joechenrh_arrow_go_v18//parquet/schema", + "@com_github_apache_arrow_go_v18//parquet", + "@com_github_apache_arrow_go_v18//parquet/compress", + "@com_github_apache_arrow_go_v18//parquet/file", + "@com_github_apache_arrow_go_v18//parquet/schema", ], ) From 7ee9076d122d215b1011c9782e83aeb4fca339ea Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Thu, 4 Sep 2025 21:20:19 -0400 Subject: [PATCH 88/93] update go mod Signed-off-by: Ruihao Chen --- go.mod | 4 ++-- go.sum | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 6596d6d43b518..14dd1e2afb327 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/YangKeao/go-mysql-driver v0.0.0-20240627104025-dd5589458cfa github.com/aliyun/alibaba-cloud-sdk-go v1.61.1581 - github.com/apache/arrow-go/v18 v18.0.0-00010101000000-000000000000 + github.com/apache/arrow-go/v18 v18.0.0 github.com/apache/skywalking-eyes v0.4.0 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/ashanbrown/makezero v1.2.0 @@ -325,7 +325,7 @@ require ( ) replace ( - github.com/apache/arrow-go/v18 => github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9 + github.com/apache/arrow-go/v18 => github.com/joechenrh/arrow-go/v18 v18.0.0-20250905011811-90682f7df921 github.com/go-ldap/ldap/v3 => github.com/YangKeao/ldap/v3 v3.4.5-0.20230421065457-369a3bab1117 github.com/pingcap/tidb/pkg/parser => ./pkg/parser diff --git a/go.sum b/go.sum index f5a5f48e713cb..438cc5b022091 100644 --- a/go.sum +++ b/go.sum @@ -465,8 +465,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9 h1:Y5Y0kd8/BrtU/0H3RAET3fW+/wvEot8ClSqQV875F7c= -github.com/joechenrh/arrow-go/v18 v18.0.0-20250901051834-4df8b8d27fe9/go.mod h1:sET3C7K44egtWGG38eMpqWr2HsvrtxRq9iLSE3dXrYw= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250905011811-90682f7df921 h1:WL6Sz4ltaoSZhhID+JOmdgN08pBWXi27IJmuhnPhEbE= +github.com/joechenrh/arrow-go/v18 v18.0.0-20250905011811-90682f7df921/go.mod h1:sET3C7K44egtWGG38eMpqWr2HsvrtxRq9iLSE3dXrYw= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8= github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g= github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE= From b0b24c1a7a699baf3c2c0b846b07e64cdbde8ffa Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 5 Sep 2025 09:03:59 -0400 Subject: [PATCH 89/93] Test Signed-off-by: Ruihao Chen --- pkg/lightning/backend/external/engine.go | 4 +- pkg/lightning/backend/external/iter.go | 2 +- pkg/lightning/backend/external/util.go | 21 +-- pkg/lightning/backend/external/util_test.go | 154 ++++++++++---------- pkg/lightning/backend/external/writer.go | 72 +++++---- 5 files changed, 134 insertions(+), 119 deletions(-) diff --git a/pkg/lightning/backend/external/engine.go b/pkg/lightning/backend/external/engine.go index 1a655a9fbacb2..00bf3dbaafd05 100644 --- a/pkg/lightning/backend/external/engine.go +++ b/pkg/lightning/backend/external/engine.go @@ -378,7 +378,7 @@ func (e *Engine) loadRangeBatchData(ctx context.Context, jobKeys [][]byte, outCh if err = e.lazyInitDupWriter(ctx); err != nil { return err } - deduplicatedKVs, dups, dupCount = removeDuplicates(deduplicatedKVs, getPairKey, true) + deduplicatedKVs, dups, dupCount = removeDuplicates(deduplicatedKVs, true) e.recordedDupCnt += len(dups) for _, p := range dups { e.recordedDupSize += int64(len(p.key) + len(p.value)) @@ -387,7 +387,7 @@ func (e *Engine) loadRangeBatchData(ctx context.Context, jobKeys [][]byte, outCh } } } else if e.onDup == engineapi.OnDuplicateKeyRemove { - deduplicatedKVs, _, dupCount = removeDuplicates(deduplicatedKVs, getPairKey, false) + deduplicatedKVs, _, dupCount = removeDuplicates(deduplicatedKVs, false) } deduplicateDur = time.Since(start) } diff --git a/pkg/lightning/backend/external/iter.go b/pkg/lightning/backend/external/iter.go index 143111f35bc12..1be3df9dcfa82 100644 --- a/pkg/lightning/backend/external/iter.go +++ b/pkg/lightning/backend/external/iter.go @@ -469,7 +469,7 @@ func (p *kvPair) len() int { return len(p.key) + len(p.value) } -func getPairKey(p *kvPair) []byte { +func (p kvPair) GetByte() []byte { return p.key } diff --git a/pkg/lightning/backend/external/util.go b/pkg/lightning/backend/external/util.go index e51bb7f8feca6..02358ad49aaf6 100644 --- a/pkg/lightning/backend/external/util.go +++ b/pkg/lightning/backend/external/util.go @@ -465,16 +465,20 @@ func SubtaskMetaPath(taskID int64, subtaskID int64) string { return path.Join(strconv.FormatInt(taskID, 10), strconv.FormatInt(subtaskID, 10), metaName) } +type elementWithGetter interface { + GetByte() []byte +} + // remove all duplicates inside sorted array in place, i.e. input elements will be changed. -func removeDuplicates[E any](in []E, keyGetter func(*E) []byte, recordRemoved bool) ([]E, []E, int) { - return doRemoveDuplicates(in, keyGetter, 0, recordRemoved) +func removeDuplicates[E elementWithGetter](in []E, recordRemoved bool) ([]E, []E, int) { + return doRemoveDuplicates(in, 0, recordRemoved) } // remove all duplicates inside sorted array in place if the duplicate count is // more than 2, and keep the first two duplicates. // we also return the total number of duplicates as the third return value. -func removeDuplicatesMoreThanTwo[E any](in []E, keyGetter func(*E) []byte) (out []E, removed []E, totalDup int) { - return doRemoveDuplicates(in, keyGetter, 2, true) +func removeDuplicatesMoreThanTwo[E elementWithGetter](in []E) (out []E, removed []E, totalDup int) { + return doRemoveDuplicates(in, 2, true) } // remove duplicates inside the sorted slice 'in', if keptDupCnt=2, we keep the @@ -482,9 +486,8 @@ func removeDuplicatesMoreThanTwo[E any](in []E, keyGetter func(*E) []byte) (out // removed duplicates are returned in 'removed' if recordRemoved=true. // we also return the total number of duplicates, either it's removed or not, as // the third return value. -func doRemoveDuplicates[E any]( +func doRemoveDuplicates[E elementWithGetter]( in []E, - keyGetter func(*E) []byte, keptDupCnt int, recordRemoved bool, ) (out []E, removed []E, totalDup int) { @@ -493,15 +496,15 @@ func doRemoveDuplicates[E any]( return in, []E{}, 0 } pivotIdx, fillIdx := 0, 0 - pivot := keyGetter(&in[pivotIdx]) + pivot := in[pivotIdx].GetByte() if recordRemoved { removed = make([]E, 0, 2) } for idx := 1; idx <= len(in); idx++ { var key []byte if idx < len(in) { - key = keyGetter(&in[idx]) - if bytes.Compare(pivot, key) == 0 { + key = in[idx].GetByte() + if bytes.Equal(pivot, key) { continue } } diff --git a/pkg/lightning/backend/external/util_test.go b/pkg/lightning/backend/external/util_test.go index 809015681b300..ad3a3f2d30593 100644 --- a/pkg/lightning/backend/external/util_test.go +++ b/pkg/lightning/backend/external/util_test.go @@ -512,46 +512,49 @@ func TestExternalMetaPath(t *testing.T) { require.Equal(t, "2/3/meta.json", SubtaskMetaPath(2, 3)) } +type intV int + +func (i intV) GetByte() []byte { + return []byte{byte(i)} +} + func TestRemoveDuplicates(t *testing.T) { - valGetter := func(e *int) []byte { - return []byte{byte(*e)} - } cases := []struct { - in []int - out []int - dups []int + in []intV + out []intV + dups []intV }{ // no duplicates - {in: []int{}, out: []int{}, dups: []int{}}, - {in: []int{1}, out: []int{1}, dups: []int{}}, - {in: []int{1, 2}, out: []int{1, 2}, dups: []int{}}, - {in: []int{1, 2, 3}, out: []int{1, 2, 3}, dups: []int{}}, - {in: []int{1, 2, 3, 4, 5}, out: []int{1, 2, 3, 4, 5}, dups: []int{}}, + {in: []intV{}, out: []intV{}, dups: []intV{}}, + {in: []intV{1}, out: []intV{1}, dups: []intV{}}, + {in: []intV{1, 2}, out: []intV{1, 2}, dups: []intV{}}, + {in: []intV{1, 2, 3}, out: []intV{1, 2, 3}, dups: []intV{}}, + {in: []intV{1, 2, 3, 4, 5}, out: []intV{1, 2, 3, 4, 5}, dups: []intV{}}, // duplicates at beginning - {in: []int{1, 1}, out: []int{}, dups: []int{1, 1}}, - {in: []int{1, 1, 1}, out: []int{}, dups: []int{1, 1, 1}}, - {in: []int{1, 1, 2, 3}, out: []int{2, 3}, dups: []int{1, 1}}, - {in: []int{1, 1, 1, 2, 3}, out: []int{2, 3}, dups: []int{1, 1, 1}}, + {in: []intV{1, 1}, out: []intV{}, dups: []intV{1, 1}}, + {in: []intV{1, 1, 1}, out: []intV{}, dups: []intV{1, 1, 1}}, + {in: []intV{1, 1, 2, 3}, out: []intV{2, 3}, dups: []intV{1, 1}}, + {in: []intV{1, 1, 1, 2, 3}, out: []intV{2, 3}, dups: []intV{1, 1, 1}}, // duplicates in middle - {in: []int{1, 2, 2, 3}, out: []int{1, 3}, dups: []int{2, 2}}, - {in: []int{1, 2, 2, 2, 3}, out: []int{1, 3}, dups: []int{2, 2, 2}}, - {in: []int{1, 2, 2, 2, 3, 3, 4}, out: []int{1, 4}, dups: []int{2, 2, 2, 3, 3}}, - {in: []int{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []int{1, 5}, dups: []int{2, 2, 2, 3, 3, 4, 4}}, - {in: []int{1, 2, 2, 2, 3, 4, 4, 5}, out: []int{1, 3, 5}, dups: []int{2, 2, 2, 4, 4}}, - {in: []int{1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, out: []int{1, 3, 6, 7, 9}, dups: []int{2, 2, 2, 4, 4, 5, 5, 8, 8}}, + {in: []intV{1, 2, 2, 3}, out: []intV{1, 3}, dups: []intV{2, 2}}, + {in: []intV{1, 2, 2, 2, 3}, out: []intV{1, 3}, dups: []intV{2, 2, 2}}, + {in: []intV{1, 2, 2, 2, 3, 3, 4}, out: []intV{1, 4}, dups: []intV{2, 2, 2, 3, 3}}, + {in: []intV{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []intV{1, 5}, dups: []intV{2, 2, 2, 3, 3, 4, 4}}, + {in: []intV{1, 2, 2, 2, 3, 4, 4, 5}, out: []intV{1, 3, 5}, dups: []intV{2, 2, 2, 4, 4}}, + {in: []intV{1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, out: []intV{1, 3, 6, 7, 9}, dups: []intV{2, 2, 2, 4, 4, 5, 5, 8, 8}}, // duplicates at end - {in: []int{1, 2, 3, 3}, out: []int{1, 2}, dups: []int{3, 3}}, - {in: []int{1, 2, 3, 3, 3}, out: []int{1, 2}, dups: []int{3, 3, 3}}, + {in: []intV{1, 2, 3, 3}, out: []intV{1, 2}, dups: []intV{3, 3}}, + {in: []intV{1, 2, 3, 3, 3}, out: []intV{1, 2}, dups: []intV{3, 3, 3}}, // mixing - {in: []int{1, 1, 2, 3, 3, 4}, out: []int{2, 4}, dups: []int{1, 1, 3, 3}}, - {in: []int{1, 2, 3, 3, 4, 4}, out: []int{1, 2}, dups: []int{3, 3, 4, 4}}, - {in: []int{1, 1, 2, 3, 4, 4}, out: []int{2, 3}, dups: []int{1, 1, 4, 4}}, - {in: []int{1, 1, 2, 2, 3, 3}, out: []int{}, dups: []int{1, 1, 2, 2, 3, 3}}, - {in: []int{1, 1, 2, 2, 2, 3, 3}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3}}, - {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3, 4, 4}}, - {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}}, - {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5}, out: []int{3}, dups: []int{1, 1, 2, 2, 2, 4, 4, 5, 5}}, - {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, out: []int{3, 6, 7}, dups: []int{1, 1, 2, 2, 2, 4, 4, 5, 5, 8, 8, 9, 9}}, + {in: []intV{1, 1, 2, 3, 3, 4}, out: []intV{2, 4}, dups: []intV{1, 1, 3, 3}}, + {in: []intV{1, 2, 3, 3, 4, 4}, out: []intV{1, 2}, dups: []intV{3, 3, 4, 4}}, + {in: []intV{1, 1, 2, 3, 4, 4}, out: []intV{2, 3}, dups: []intV{1, 1, 4, 4}}, + {in: []intV{1, 1, 2, 2, 3, 3}, out: []intV{}, dups: []intV{1, 1, 2, 2, 3, 3}}, + {in: []intV{1, 1, 2, 2, 2, 3, 3}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3}}, + {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4}}, + {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}}, + {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5}, out: []intV{3}, dups: []intV{1, 1, 2, 2, 2, 4, 4, 5, 5}}, + {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, out: []intV{3, 6, 7}, dups: []intV{1, 1, 2, 2, 2, 4, 4, 5, 5, 8, 8, 9, 9}}, } for i, c := range cases { @@ -560,16 +563,16 @@ func TestRemoveDuplicates(t *testing.T) { require.True(t, slices.IsSorted(c.out)) require.True(t, slices.IsSorted(c.dups)) require.Equal(t, len(c.dups), len(c.in)-len(c.out)) - tmpIn := make([]int, len(c.in)) + tmpIn := make([]intV, len(c.in)) copy(tmpIn, c.in) - out, dups, dupCnt := removeDuplicates(tmpIn, valGetter, true) + out, dups, dupCnt := removeDuplicates(tmpIn, true) require.EqualValues(t, c.out, out) require.EqualValues(t, c.dups, dups) require.Equal(t, dupCnt, len(dups)) - tmpIn = make([]int, len(c.in)) + tmpIn = make([]intV, len(c.in)) copy(tmpIn, c.in) - out, dups, dupCnt = removeDuplicates(tmpIn, valGetter, false) + out, dups, dupCnt = removeDuplicates(tmpIn, false) require.EqualValues(t, c.out, out) require.Empty(t, dups) require.Equal(t, dupCnt, len(c.dups)) @@ -578,53 +581,50 @@ func TestRemoveDuplicates(t *testing.T) { } func TestRemoveDuplicatesMoreThan2(t *testing.T) { - valGetter := func(e *int) []byte { - return []byte{byte(*e)} - } cases := []struct { - in []int - out []int - dups []int + in []intV + out []intV + dups []intV total int }{ // no duplicates - {in: []int{}, out: []int{}, dups: []int{}, total: 0}, - {in: []int{1}, out: []int{1}, dups: []int{}, total: 0}, - {in: []int{1, 2}, out: []int{1, 2}, dups: []int{}, total: 0}, - {in: []int{1, 2, 3}, out: []int{1, 2, 3}, dups: []int{}, total: 0}, - {in: []int{1, 2, 3, 4, 5}, out: []int{1, 2, 3, 4, 5}, dups: []int{}, total: 0}, + {in: []intV{}, out: []intV{}, dups: []intV{}, total: 0}, + {in: []intV{1}, out: []intV{1}, dups: []intV{}, total: 0}, + {in: []intV{1, 2}, out: []intV{1, 2}, dups: []intV{}, total: 0}, + {in: []intV{1, 2, 3}, out: []intV{1, 2, 3}, dups: []intV{}, total: 0}, + {in: []intV{1, 2, 3, 4, 5}, out: []intV{1, 2, 3, 4, 5}, dups: []intV{}, total: 0}, // duplicates at beginning - {in: []int{1, 1}, out: []int{1, 1}, dups: []int{}, total: 2}, - {in: []int{1, 1, 1}, out: []int{1, 1}, dups: []int{1}, total: 3}, - {in: []int{1, 1, 1, 1}, out: []int{1, 1}, dups: []int{1, 1}, total: 4}, - {in: []int{1, 1, 1, 1, 1}, out: []int{1, 1}, dups: []int{1, 1, 1}, total: 5}, - {in: []int{1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{}, total: 2}, - {in: []int{1, 1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{1}, total: 3}, - {in: []int{1, 1, 1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{1, 1}, total: 4}, + {in: []intV{1, 1}, out: []intV{1, 1}, dups: []intV{}, total: 2}, + {in: []intV{1, 1, 1}, out: []intV{1, 1}, dups: []intV{1}, total: 3}, + {in: []intV{1, 1, 1, 1}, out: []intV{1, 1}, dups: []intV{1, 1}, total: 4}, + {in: []intV{1, 1, 1, 1, 1}, out: []intV{1, 1}, dups: []intV{1, 1, 1}, total: 5}, + {in: []intV{1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{}, total: 2}, + {in: []intV{1, 1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{1}, total: 3}, + {in: []intV{1, 1, 1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{1, 1}, total: 4}, // duplicates in middle - {in: []int{1, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{}, total: 2}, - {in: []int{1, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2}, total: 3}, - {in: []int{1, 2, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2, 2}, total: 4}, - {in: []int{1, 2, 2, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2, 2, 2}, total: 5}, - {in: []int{1, 2, 2, 2, 3, 3, 4}, out: []int{1, 2, 2, 3, 3, 4}, dups: []int{2}, total: 5}, - {in: []int{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []int{1, 2, 2, 3, 3, 4, 4, 5}, dups: []int{2}, total: 7}, - {in: []int{1, 2, 2, 2, 3, 4, 4, 5}, out: []int{1, 2, 2, 3, 4, 4, 5}, dups: []int{2}, total: 5}, - {in: []int{1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9}, out: []int{1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, dups: []int{2, 5}, total: 10}, + {in: []intV{1, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{}, total: 2}, + {in: []intV{1, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2}, total: 3}, + {in: []intV{1, 2, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2, 2}, total: 4}, + {in: []intV{1, 2, 2, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2, 2, 2}, total: 5}, + {in: []intV{1, 2, 2, 2, 3, 3, 4}, out: []intV{1, 2, 2, 3, 3, 4}, dups: []intV{2}, total: 5}, + {in: []intV{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []intV{1, 2, 2, 3, 3, 4, 4, 5}, dups: []intV{2}, total: 7}, + {in: []intV{1, 2, 2, 2, 3, 4, 4, 5}, out: []intV{1, 2, 2, 3, 4, 4, 5}, dups: []intV{2}, total: 5}, + {in: []intV{1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9}, out: []intV{1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, dups: []intV{2, 5}, total: 10}, // duplicates at end - {in: []int{1, 2, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{}, total: 2}, - {in: []int{1, 2, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3}, total: 3}, - {in: []int{1, 2, 3, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3, 3}, total: 4}, - {in: []int{1, 2, 3, 3, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3, 3, 3}, total: 5}, + {in: []intV{1, 2, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{}, total: 2}, + {in: []intV{1, 2, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3}, total: 3}, + {in: []intV{1, 2, 3, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3, 3}, total: 4}, + {in: []intV{1, 2, 3, 3, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3, 3, 3}, total: 5}, // mixing - {in: []int{1, 1, 1, 1, 1, 2, 3, 3, 3, 4}, out: []int{1, 1, 2, 3, 3, 4}, dups: []int{1, 1, 1, 3}, total: 8}, - {in: []int{1, 2, 3, 3, 3, 4, 4, 4}, out: []int{1, 2, 3, 3, 4, 4}, dups: []int{3, 4}, total: 6}, - {in: []int{1, 1, 1, 2, 3, 4, 4, 4}, out: []int{1, 1, 2, 3, 4, 4}, dups: []int{1, 4}, total: 6}, - {in: []int{1, 1, 1, 2, 2, 2, 3, 3, 3}, out: []int{1, 1, 2, 2, 3, 3}, dups: []int{1, 2, 3}, total: 9}, - {in: []int{1, 1, 2, 2, 2, 3, 3}, out: []int{1, 1, 2, 2, 3, 3}, dups: []int{2}, total: 7}, - {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 4}, out: []int{1, 1, 2, 2, 3, 3, 4, 4}, dups: []int{2, 4}, total: 10}, - {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5}, out: []int{1, 1, 2, 2, 3, 3, 4, 4, 5, 5}, dups: []int{2, 4}, total: 12}, - {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5}, out: []int{1, 1, 2, 2, 3, 4, 4, 5, 5}, dups: []int{2, 4, 5}, total: 11}, - {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9, 9}, out: []int{1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, dups: []int{2, 5}, total: 14}, + {in: []intV{1, 1, 1, 1, 1, 2, 3, 3, 3, 4}, out: []intV{1, 1, 2, 3, 3, 4}, dups: []intV{1, 1, 1, 3}, total: 8}, + {in: []intV{1, 2, 3, 3, 3, 4, 4, 4}, out: []intV{1, 2, 3, 3, 4, 4}, dups: []intV{3, 4}, total: 6}, + {in: []intV{1, 1, 1, 2, 3, 4, 4, 4}, out: []intV{1, 1, 2, 3, 4, 4}, dups: []intV{1, 4}, total: 6}, + {in: []intV{1, 1, 1, 2, 2, 2, 3, 3, 3}, out: []intV{1, 1, 2, 2, 3, 3}, dups: []intV{1, 2, 3}, total: 9}, + {in: []intV{1, 1, 2, 2, 2, 3, 3}, out: []intV{1, 1, 2, 2, 3, 3}, dups: []intV{2}, total: 7}, + {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 4}, out: []intV{1, 1, 2, 2, 3, 3, 4, 4}, dups: []intV{2, 4}, total: 10}, + {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5}, out: []intV{1, 1, 2, 2, 3, 3, 4, 4, 5, 5}, dups: []intV{2, 4}, total: 12}, + {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5}, out: []intV{1, 1, 2, 2, 3, 4, 4, 5, 5}, dups: []intV{2, 4, 5}, total: 11}, + {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9, 9}, out: []intV{1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, dups: []intV{2, 5}, total: 14}, } for i, c := range cases { @@ -633,9 +633,9 @@ func TestRemoveDuplicatesMoreThan2(t *testing.T) { require.True(t, slices.IsSorted(c.out)) require.True(t, slices.IsSorted(c.dups)) require.Equal(t, len(c.dups), len(c.in)-len(c.out)) - tmpIn := make([]int, len(c.in)) + tmpIn := make([]intV, len(c.in)) copy(tmpIn, c.in) - out, dups, totalDup := removeDuplicatesMoreThanTwo(tmpIn, valGetter) + out, dups, totalDup := removeDuplicatesMoreThanTwo(tmpIn) require.EqualValues(t, c.out, out) require.EqualValues(t, c.dups, dups) require.Equal(t, c.total, totalDup) diff --git a/pkg/lightning/backend/external/writer.go b/pkg/lightning/backend/external/writer.go index 7510e3576baff..0f16a8a7c1130 100644 --- a/pkg/lightning/backend/external/writer.go +++ b/pkg/lightning/backend/external/writer.go @@ -410,6 +410,15 @@ func GetMaxOverlappingTotal(stats []MultipleFilesStat) int64 { return GetMaxOverlapping(points) } +type locationWithBuf struct { + loc membuf.SliceLocation + key []byte +} + +func (l *locationWithBuf) GetByte() []byte { + return l.key +} + // Writer is used to write data into external storage. type Writer struct { store storage.ExternalStorage @@ -422,9 +431,9 @@ type Writer struct { memSizeLimit uint64 - kvBuffer *membuf.Buffer - kvLocations []membuf.SliceLocation - kvSize int64 + kvBuffer *membuf.Buffer + locationWithBuf []*locationWithBuf + kvSize int64 onClose OnCloseFunc onDup engineapi.OnDuplicateKey @@ -473,7 +482,10 @@ func (w *Writer) WriteRow(ctx context.Context, key, val []byte, handle tidbkv.Ha copy(dataBuf[2*lengthBytes:], key) copy(dataBuf[2*lengthBytes+keyLen:], val) - w.kvLocations = append(w.kvLocations, loc) + w.locationWithBuf = append(w.locationWithBuf, &locationWithBuf{ + loc: loc, + key: dataBuf[2*lengthBytes : 2*lengthBytes+keyLen], + }) // TODO: maybe we can unify the size calculation during write to store. w.kvSize += int64(keyLen + len(val)) w.batchSize += uint64(length) @@ -501,11 +513,11 @@ func (w *Writer) Close(ctx context.Context) error { logutil.Logger(ctx).Info("close writer", zap.String("writerID", w.writerID), - zap.Int("kv-cnt-cap", cap(w.kvLocations)), + zap.Int("kv-cnt-cap", cap(w.locationWithBuf)), zap.String("minKey", hex.EncodeToString(w.minKey)), zap.String("maxKey", hex.EncodeToString(w.maxKey))) - w.kvLocations = nil + w.locationWithBuf = nil w.onClose(&WriterSummary{ WriterID: w.writerID, GroupOffset: w.groupOffset, @@ -533,7 +545,7 @@ func (w *Writer) recordMinMax(newMin, newMax tidbkv.Key, size uint64) { const flushKVsRetryTimes = 3 func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { - if len(w.kvLocations) == 0 { + if len(w.locationWithBuf) == 0 { return nil } @@ -544,13 +556,13 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { sortStart := time.Now() var ( dupFound bool - dupLoc *membuf.SliceLocation + dupLoc *locationWithBuf ) - slices.SortFunc(w.kvLocations, func(i, j membuf.SliceLocation) int { - res := bytes.Compare(w.getKeyByLoc(&i), w.getKeyByLoc(&j)) + slices.SortFunc(w.locationWithBuf, func(i, j *locationWithBuf) int { + res := bytes.Compare(i.key, j.key) if res == 0 && !dupFound { dupFound = true - dupLoc = &i + dupLoc = i } return res }) @@ -558,9 +570,9 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { metrics.GlobalSortWriteToCloudStorageDuration.WithLabelValues("sort").Observe(sortDuration.Seconds()) metrics.GlobalSortWriteToCloudStorageRate.WithLabelValues("sort").Observe(float64(w.batchSize) / 1024.0 / 1024.0 / sortDuration.Seconds()) - batchKVCnt := len(w.kvLocations) + batchKVCnt := len(w.locationWithBuf) var ( - dupLocs []membuf.SliceLocation + dupLocs []*locationWithBuf dupCnt int ) if dupFound { @@ -569,14 +581,14 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { case engineapi.OnDuplicateKeyRecord: // we don't have a global view, so need to keep duplicates with duplicate // count <= 2, so later we can find them. - w.kvLocations, dupLocs, dupCnt = removeDuplicatesMoreThanTwo(w.kvLocations, w.getKeyByLoc) + w.locationWithBuf, dupLocs, dupCnt = removeDuplicatesMoreThanTwo(w.locationWithBuf) w.kvSize = w.reCalculateKVSize() case engineapi.OnDuplicateKeyRemove: - w.kvLocations, _, dupCnt = removeDuplicates(w.kvLocations, w.getKeyByLoc, false) + w.locationWithBuf, _, dupCnt = removeDuplicates(w.locationWithBuf, false) w.kvSize = w.reCalculateKVSize() case engineapi.OnDuplicateKeyError: - dupKey := slices.Clone(w.getKeyByLoc(dupLoc)) - dupValue := slices.Clone(w.getValueByLoc(dupLoc)) + dupKey := slices.Clone(w.getKeyByLoc(&dupLoc.loc)) + dupValue := slices.Clone(w.getValueByLoc(&dupLoc.loc)) return common.ErrFoundDuplicateKeys.FastGenByArgs(dupKey, dupValue) } } @@ -585,7 +597,7 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { var dataFile, statFile, dupFile string // due to current semantic of OnDuplicateKeyRecord, if len(w.kvLocations) = 0, // len(dupLocs) is also 0 - if len(w.kvLocations) > 0 { + if len(w.locationWithBuf) > 0 { for i := range flushKVsRetryTimes { dataFile, statFile, dupFile, err = w.flushSortedKVs(ctx, dupLocs) if err == nil || ctx.Err() != nil { @@ -617,10 +629,10 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { metrics.GlobalSortWriteToCloudStorageRate.WithLabelValues("sort_and_write").Observe(float64(w.batchSize) / 1024.0 / 1024.0 / totalDuration.Seconds()) // maintain 500-batch statistics - if len(w.kvLocations) > 0 { - w.totalCnt += uint64(len(w.kvLocations)) + if len(w.locationWithBuf) > 0 { + w.totalCnt += uint64(len(w.locationWithBuf)) - minKey, maxKey := w.getKeyByLoc(&w.kvLocations[0]), w.getKeyByLoc(&w.kvLocations[len(w.kvLocations)-1]) + minKey, maxKey := w.locationWithBuf[0].key, w.locationWithBuf[len(w.locationWithBuf)-1].key w.recordMinMax(minKey, maxKey, uint64(w.kvSize)) w.addNewKVFile2MultiFileStats(dataFile, statFile, minKey, maxKey) @@ -637,7 +649,7 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { }) } - w.kvLocations = w.kvLocations[:0] + w.locationWithBuf = w.locationWithBuf[:0] w.kvSize = 0 w.kvBuffer.Reset() w.batchSize = 0 @@ -667,7 +679,7 @@ func (w *Writer) addNewKVFile2MultiFileStats(dataFile, statFile string, minKey, w.fileMaxKeys = append(w.fileMaxKeys, tidbkv.Key(maxKey).Clone()) } -func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []membuf.SliceLocation) (string, string, string, error) { +func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []*locationWithBuf) (string, string, string, error) { logger := logutil.Logger(ctx).With( zap.String("writer-id", w.writerID), zap.Int("sequence-number", w.currentSeq), @@ -690,8 +702,8 @@ func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []membuf.SliceLocat w.rc.reset() kvStore := NewKeyValueStore(ctx, dataWriter, w.rc) - for _, pair := range w.kvLocations { - err = kvStore.addEncodedData(w.kvBuffer.GetSlice(&pair)) + for _, pair := range w.locationWithBuf { + err = kvStore.addEncodedData(w.kvBuffer.GetSlice(&pair.loc)) if err != nil { return "", "", "", err } @@ -736,7 +748,7 @@ func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []membuf.SliceLocat return dataFile, statFile, dupPath, nil } -func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []membuf.SliceLocation) (string, error) { +func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []*locationWithBuf) (string, error) { dupPath, dupWriter, err := w.createDupWriter(ctx) if err != nil { return "", err @@ -749,8 +761,8 @@ func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []membuf.SliceLocation) } }() dupStore := NewKeyValueStore(ctx, dupWriter, nil) - for _, pair := range kvLocs { - err = dupStore.addEncodedData(w.kvBuffer.GetSlice(&pair)) + for _, l := range kvLocs { + err = dupStore.addEncodedData(w.kvBuffer.GetSlice(&l.loc)) if err != nil { return "", err } @@ -778,8 +790,8 @@ func (w *Writer) getValueByLoc(loc *membuf.SliceLocation) []byte { func (w *Writer) reCalculateKVSize() int64 { s := int64(0) - for _, loc := range w.kvLocations { - s += int64(loc.Length) - 2*lengthBytes + for _, loc := range w.locationWithBuf { + s += int64(loc.loc.Length) - 2*lengthBytes } return s } From 8b3378bb876427d420aafeb14399330d12359ef9 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sat, 6 Sep 2025 01:07:52 -0400 Subject: [PATCH 90/93] fix after merge Signed-off-by: Ruihao Chen --- DEPS.bzl | 12 ++++++------ pkg/executor/importer/BUILD.bazel | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/DEPS.bzl b/DEPS.bzl index beb5377441ba8..80b30fb93e2c5 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -346,13 +346,13 @@ def go_deps(): name = "com_github_apache_arrow_go_v18", build_file_proto_mode = "disable_global", importpath = "github.com/apache/arrow-go/v18", - sha256 = "af71f087f5777b1d33c7c365780d92ede1301925bcb31b408929ce460678ee4a", - strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250901051834-4df8b8d27fe9", + sha256 = "cca2429a67d41709582a7df5d7249e5c7c95b0eb2a54aaabab90f1c002bce5ea", + strip_prefix = "github.com/joechenrh/arrow-go/v18@v18.0.0-20250905011811-90682f7df921", urls = [ - "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", - "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", - "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", - "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250901051834-4df8b8d27fe9.zip", + "http://bazel-cache.pingcap.net:8080/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250905011811-90682f7df921.zip", + "http://ats.apps.svc/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250905011811-90682f7df921.zip", + "https://cache.hawkingrei.com/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250905011811-90682f7df921.zip", + "https://storage.googleapis.com/pingcapmirror/gomod/github.com/joechenrh/arrow-go/v18/com_github_joechenrh_arrow_go_v18-v18.0.0-20250905011811-90682f7df921.zip", ], ) go_repository( diff --git a/pkg/executor/importer/BUILD.bazel b/pkg/executor/importer/BUILD.bazel index 4c6d63ebac305..ca30ef450cc8d 100644 --- a/pkg/executor/importer/BUILD.bazel +++ b/pkg/executor/importer/BUILD.bazel @@ -78,7 +78,6 @@ go_library( "//pkg/util/sqlkiller", "//pkg/util/stringutil", "//pkg/util/syncutil", - "//pkg/util/timeutil", "@com_github_docker_go_units//:go-units", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", From 6c447f9ee62967de772c8ea92dc3c15b734a3c6c Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sat, 6 Sep 2025 01:10:09 -0400 Subject: [PATCH 91/93] Revert "Test" This reverts commit b0b24c1a7a699baf3c2c0b846b07e64cdbde8ffa. --- pkg/lightning/backend/external/engine.go | 4 +- pkg/lightning/backend/external/iter.go | 2 +- pkg/lightning/backend/external/util.go | 21 ++- pkg/lightning/backend/external/util_test.go | 154 ++++++++++---------- pkg/lightning/backend/external/writer.go | 72 ++++----- 5 files changed, 119 insertions(+), 134 deletions(-) diff --git a/pkg/lightning/backend/external/engine.go b/pkg/lightning/backend/external/engine.go index 00bf3dbaafd05..1a655a9fbacb2 100644 --- a/pkg/lightning/backend/external/engine.go +++ b/pkg/lightning/backend/external/engine.go @@ -378,7 +378,7 @@ func (e *Engine) loadRangeBatchData(ctx context.Context, jobKeys [][]byte, outCh if err = e.lazyInitDupWriter(ctx); err != nil { return err } - deduplicatedKVs, dups, dupCount = removeDuplicates(deduplicatedKVs, true) + deduplicatedKVs, dups, dupCount = removeDuplicates(deduplicatedKVs, getPairKey, true) e.recordedDupCnt += len(dups) for _, p := range dups { e.recordedDupSize += int64(len(p.key) + len(p.value)) @@ -387,7 +387,7 @@ func (e *Engine) loadRangeBatchData(ctx context.Context, jobKeys [][]byte, outCh } } } else if e.onDup == engineapi.OnDuplicateKeyRemove { - deduplicatedKVs, _, dupCount = removeDuplicates(deduplicatedKVs, false) + deduplicatedKVs, _, dupCount = removeDuplicates(deduplicatedKVs, getPairKey, false) } deduplicateDur = time.Since(start) } diff --git a/pkg/lightning/backend/external/iter.go b/pkg/lightning/backend/external/iter.go index 36b189a1b66bb..fb0ed5b7f7e27 100644 --- a/pkg/lightning/backend/external/iter.go +++ b/pkg/lightning/backend/external/iter.go @@ -469,7 +469,7 @@ func (p *kvPair) len() int { return len(p.key) + len(p.value) } -func (p kvPair) GetByte() []byte { +func getPairKey(p *kvPair) []byte { return p.key } diff --git a/pkg/lightning/backend/external/util.go b/pkg/lightning/backend/external/util.go index 04f6780d70b94..9e4dc937b6462 100644 --- a/pkg/lightning/backend/external/util.go +++ b/pkg/lightning/backend/external/util.go @@ -482,20 +482,16 @@ func SubtaskMetaPath(taskID int64, subtaskID int64) string { return path.Join(strconv.FormatInt(taskID, 10), strconv.FormatInt(subtaskID, 10), metaName) } -type elementWithGetter interface { - GetByte() []byte -} - // remove all duplicates inside sorted array in place, i.e. input elements will be changed. -func removeDuplicates[E elementWithGetter](in []E, recordRemoved bool) ([]E, []E, int) { - return doRemoveDuplicates(in, 0, recordRemoved) +func removeDuplicates[E any](in []E, keyGetter func(*E) []byte, recordRemoved bool) ([]E, []E, int) { + return doRemoveDuplicates(in, keyGetter, 0, recordRemoved) } // remove all duplicates inside sorted array in place if the duplicate count is // more than 2, and keep the first two duplicates. // we also return the total number of duplicates as the third return value. -func removeDuplicatesMoreThanTwo[E elementWithGetter](in []E) (out []E, removed []E, totalDup int) { - return doRemoveDuplicates(in, 2, true) +func removeDuplicatesMoreThanTwo[E any](in []E, keyGetter func(*E) []byte) (out []E, removed []E, totalDup int) { + return doRemoveDuplicates(in, keyGetter, 2, true) } // remove duplicates inside the sorted slice 'in', if keptDupCnt=2, we keep the @@ -503,8 +499,9 @@ func removeDuplicatesMoreThanTwo[E elementWithGetter](in []E) (out []E, removed // removed duplicates are returned in 'removed' if recordRemoved=true. // we also return the total number of duplicates, either it's removed or not, as // the third return value. -func doRemoveDuplicates[E elementWithGetter]( +func doRemoveDuplicates[E any]( in []E, + keyGetter func(*E) []byte, keptDupCnt int, recordRemoved bool, ) (out []E, removed []E, totalDup int) { @@ -513,15 +510,15 @@ func doRemoveDuplicates[E elementWithGetter]( return in, []E{}, 0 } pivotIdx, fillIdx := 0, 0 - pivot := in[pivotIdx].GetByte() + pivot := keyGetter(&in[pivotIdx]) if recordRemoved { removed = make([]E, 0, 2) } for idx := 1; idx <= len(in); idx++ { var key []byte if idx < len(in) { - key = in[idx].GetByte() - if bytes.Equal(pivot, key) { + key = keyGetter(&in[idx]) + if bytes.Compare(pivot, key) == 0 { continue } } diff --git a/pkg/lightning/backend/external/util_test.go b/pkg/lightning/backend/external/util_test.go index f097cf3ccae0c..2374d02c63725 100644 --- a/pkg/lightning/backend/external/util_test.go +++ b/pkg/lightning/backend/external/util_test.go @@ -509,49 +509,46 @@ func TestExternalMetaPath(t *testing.T) { require.Equal(t, "2/3/meta.json", SubtaskMetaPath(2, 3)) } -type intV int - -func (i intV) GetByte() []byte { - return []byte{byte(i)} -} - func TestRemoveDuplicates(t *testing.T) { + valGetter := func(e *int) []byte { + return []byte{byte(*e)} + } cases := []struct { - in []intV - out []intV - dups []intV + in []int + out []int + dups []int }{ // no duplicates - {in: []intV{}, out: []intV{}, dups: []intV{}}, - {in: []intV{1}, out: []intV{1}, dups: []intV{}}, - {in: []intV{1, 2}, out: []intV{1, 2}, dups: []intV{}}, - {in: []intV{1, 2, 3}, out: []intV{1, 2, 3}, dups: []intV{}}, - {in: []intV{1, 2, 3, 4, 5}, out: []intV{1, 2, 3, 4, 5}, dups: []intV{}}, + {in: []int{}, out: []int{}, dups: []int{}}, + {in: []int{1}, out: []int{1}, dups: []int{}}, + {in: []int{1, 2}, out: []int{1, 2}, dups: []int{}}, + {in: []int{1, 2, 3}, out: []int{1, 2, 3}, dups: []int{}}, + {in: []int{1, 2, 3, 4, 5}, out: []int{1, 2, 3, 4, 5}, dups: []int{}}, // duplicates at beginning - {in: []intV{1, 1}, out: []intV{}, dups: []intV{1, 1}}, - {in: []intV{1, 1, 1}, out: []intV{}, dups: []intV{1, 1, 1}}, - {in: []intV{1, 1, 2, 3}, out: []intV{2, 3}, dups: []intV{1, 1}}, - {in: []intV{1, 1, 1, 2, 3}, out: []intV{2, 3}, dups: []intV{1, 1, 1}}, + {in: []int{1, 1}, out: []int{}, dups: []int{1, 1}}, + {in: []int{1, 1, 1}, out: []int{}, dups: []int{1, 1, 1}}, + {in: []int{1, 1, 2, 3}, out: []int{2, 3}, dups: []int{1, 1}}, + {in: []int{1, 1, 1, 2, 3}, out: []int{2, 3}, dups: []int{1, 1, 1}}, // duplicates in middle - {in: []intV{1, 2, 2, 3}, out: []intV{1, 3}, dups: []intV{2, 2}}, - {in: []intV{1, 2, 2, 2, 3}, out: []intV{1, 3}, dups: []intV{2, 2, 2}}, - {in: []intV{1, 2, 2, 2, 3, 3, 4}, out: []intV{1, 4}, dups: []intV{2, 2, 2, 3, 3}}, - {in: []intV{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []intV{1, 5}, dups: []intV{2, 2, 2, 3, 3, 4, 4}}, - {in: []intV{1, 2, 2, 2, 3, 4, 4, 5}, out: []intV{1, 3, 5}, dups: []intV{2, 2, 2, 4, 4}}, - {in: []intV{1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, out: []intV{1, 3, 6, 7, 9}, dups: []intV{2, 2, 2, 4, 4, 5, 5, 8, 8}}, + {in: []int{1, 2, 2, 3}, out: []int{1, 3}, dups: []int{2, 2}}, + {in: []int{1, 2, 2, 2, 3}, out: []int{1, 3}, dups: []int{2, 2, 2}}, + {in: []int{1, 2, 2, 2, 3, 3, 4}, out: []int{1, 4}, dups: []int{2, 2, 2, 3, 3}}, + {in: []int{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []int{1, 5}, dups: []int{2, 2, 2, 3, 3, 4, 4}}, + {in: []int{1, 2, 2, 2, 3, 4, 4, 5}, out: []int{1, 3, 5}, dups: []int{2, 2, 2, 4, 4}}, + {in: []int{1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, out: []int{1, 3, 6, 7, 9}, dups: []int{2, 2, 2, 4, 4, 5, 5, 8, 8}}, // duplicates at end - {in: []intV{1, 2, 3, 3}, out: []intV{1, 2}, dups: []intV{3, 3}}, - {in: []intV{1, 2, 3, 3, 3}, out: []intV{1, 2}, dups: []intV{3, 3, 3}}, + {in: []int{1, 2, 3, 3}, out: []int{1, 2}, dups: []int{3, 3}}, + {in: []int{1, 2, 3, 3, 3}, out: []int{1, 2}, dups: []int{3, 3, 3}}, // mixing - {in: []intV{1, 1, 2, 3, 3, 4}, out: []intV{2, 4}, dups: []intV{1, 1, 3, 3}}, - {in: []intV{1, 2, 3, 3, 4, 4}, out: []intV{1, 2}, dups: []intV{3, 3, 4, 4}}, - {in: []intV{1, 1, 2, 3, 4, 4}, out: []intV{2, 3}, dups: []intV{1, 1, 4, 4}}, - {in: []intV{1, 1, 2, 2, 3, 3}, out: []intV{}, dups: []intV{1, 1, 2, 2, 3, 3}}, - {in: []intV{1, 1, 2, 2, 2, 3, 3}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3}}, - {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4}}, - {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}, out: []intV{}, dups: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}}, - {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5}, out: []intV{3}, dups: []intV{1, 1, 2, 2, 2, 4, 4, 5, 5}}, - {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, out: []intV{3, 6, 7}, dups: []intV{1, 1, 2, 2, 2, 4, 4, 5, 5, 8, 8, 9, 9}}, + {in: []int{1, 1, 2, 3, 3, 4}, out: []int{2, 4}, dups: []int{1, 1, 3, 3}}, + {in: []int{1, 2, 3, 3, 4, 4}, out: []int{1, 2}, dups: []int{3, 3, 4, 4}}, + {in: []int{1, 1, 2, 3, 4, 4}, out: []int{2, 3}, dups: []int{1, 1, 4, 4}}, + {in: []int{1, 1, 2, 2, 3, 3}, out: []int{}, dups: []int{1, 1, 2, 2, 3, 3}}, + {in: []int{1, 1, 2, 2, 2, 3, 3}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3}}, + {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3, 4, 4}}, + {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}, out: []int{}, dups: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5}}, + {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5}, out: []int{3}, dups: []int{1, 1, 2, 2, 2, 4, 4, 5, 5}}, + {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, out: []int{3, 6, 7}, dups: []int{1, 1, 2, 2, 2, 4, 4, 5, 5, 8, 8, 9, 9}}, } for i, c := range cases { @@ -560,16 +557,16 @@ func TestRemoveDuplicates(t *testing.T) { require.True(t, slices.IsSorted(c.out)) require.True(t, slices.IsSorted(c.dups)) require.Equal(t, len(c.dups), len(c.in)-len(c.out)) - tmpIn := make([]intV, len(c.in)) + tmpIn := make([]int, len(c.in)) copy(tmpIn, c.in) - out, dups, dupCnt := removeDuplicates(tmpIn, true) + out, dups, dupCnt := removeDuplicates(tmpIn, valGetter, true) require.EqualValues(t, c.out, out) require.EqualValues(t, c.dups, dups) require.Equal(t, dupCnt, len(dups)) - tmpIn = make([]intV, len(c.in)) + tmpIn = make([]int, len(c.in)) copy(tmpIn, c.in) - out, dups, dupCnt = removeDuplicates(tmpIn, false) + out, dups, dupCnt = removeDuplicates(tmpIn, valGetter, false) require.EqualValues(t, c.out, out) require.Empty(t, dups) require.Equal(t, dupCnt, len(c.dups)) @@ -578,50 +575,53 @@ func TestRemoveDuplicates(t *testing.T) { } func TestRemoveDuplicatesMoreThan2(t *testing.T) { + valGetter := func(e *int) []byte { + return []byte{byte(*e)} + } cases := []struct { - in []intV - out []intV - dups []intV + in []int + out []int + dups []int total int }{ // no duplicates - {in: []intV{}, out: []intV{}, dups: []intV{}, total: 0}, - {in: []intV{1}, out: []intV{1}, dups: []intV{}, total: 0}, - {in: []intV{1, 2}, out: []intV{1, 2}, dups: []intV{}, total: 0}, - {in: []intV{1, 2, 3}, out: []intV{1, 2, 3}, dups: []intV{}, total: 0}, - {in: []intV{1, 2, 3, 4, 5}, out: []intV{1, 2, 3, 4, 5}, dups: []intV{}, total: 0}, + {in: []int{}, out: []int{}, dups: []int{}, total: 0}, + {in: []int{1}, out: []int{1}, dups: []int{}, total: 0}, + {in: []int{1, 2}, out: []int{1, 2}, dups: []int{}, total: 0}, + {in: []int{1, 2, 3}, out: []int{1, 2, 3}, dups: []int{}, total: 0}, + {in: []int{1, 2, 3, 4, 5}, out: []int{1, 2, 3, 4, 5}, dups: []int{}, total: 0}, // duplicates at beginning - {in: []intV{1, 1}, out: []intV{1, 1}, dups: []intV{}, total: 2}, - {in: []intV{1, 1, 1}, out: []intV{1, 1}, dups: []intV{1}, total: 3}, - {in: []intV{1, 1, 1, 1}, out: []intV{1, 1}, dups: []intV{1, 1}, total: 4}, - {in: []intV{1, 1, 1, 1, 1}, out: []intV{1, 1}, dups: []intV{1, 1, 1}, total: 5}, - {in: []intV{1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{}, total: 2}, - {in: []intV{1, 1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{1}, total: 3}, - {in: []intV{1, 1, 1, 1, 2, 3}, out: []intV{1, 1, 2, 3}, dups: []intV{1, 1}, total: 4}, + {in: []int{1, 1}, out: []int{1, 1}, dups: []int{}, total: 2}, + {in: []int{1, 1, 1}, out: []int{1, 1}, dups: []int{1}, total: 3}, + {in: []int{1, 1, 1, 1}, out: []int{1, 1}, dups: []int{1, 1}, total: 4}, + {in: []int{1, 1, 1, 1, 1}, out: []int{1, 1}, dups: []int{1, 1, 1}, total: 5}, + {in: []int{1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{}, total: 2}, + {in: []int{1, 1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{1}, total: 3}, + {in: []int{1, 1, 1, 1, 2, 3}, out: []int{1, 1, 2, 3}, dups: []int{1, 1}, total: 4}, // duplicates in middle - {in: []intV{1, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{}, total: 2}, - {in: []intV{1, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2}, total: 3}, - {in: []intV{1, 2, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2, 2}, total: 4}, - {in: []intV{1, 2, 2, 2, 2, 2, 3}, out: []intV{1, 2, 2, 3}, dups: []intV{2, 2, 2}, total: 5}, - {in: []intV{1, 2, 2, 2, 3, 3, 4}, out: []intV{1, 2, 2, 3, 3, 4}, dups: []intV{2}, total: 5}, - {in: []intV{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []intV{1, 2, 2, 3, 3, 4, 4, 5}, dups: []intV{2}, total: 7}, - {in: []intV{1, 2, 2, 2, 3, 4, 4, 5}, out: []intV{1, 2, 2, 3, 4, 4, 5}, dups: []intV{2}, total: 5}, - {in: []intV{1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9}, out: []intV{1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, dups: []intV{2, 5}, total: 10}, + {in: []int{1, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{}, total: 2}, + {in: []int{1, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2}, total: 3}, + {in: []int{1, 2, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2, 2}, total: 4}, + {in: []int{1, 2, 2, 2, 2, 2, 3}, out: []int{1, 2, 2, 3}, dups: []int{2, 2, 2}, total: 5}, + {in: []int{1, 2, 2, 2, 3, 3, 4}, out: []int{1, 2, 2, 3, 3, 4}, dups: []int{2}, total: 5}, + {in: []int{1, 2, 2, 2, 3, 3, 4, 4, 5}, out: []int{1, 2, 2, 3, 3, 4, 4, 5}, dups: []int{2}, total: 7}, + {in: []int{1, 2, 2, 2, 3, 4, 4, 5}, out: []int{1, 2, 2, 3, 4, 4, 5}, dups: []int{2}, total: 5}, + {in: []int{1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9}, out: []int{1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9}, dups: []int{2, 5}, total: 10}, // duplicates at end - {in: []intV{1, 2, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{}, total: 2}, - {in: []intV{1, 2, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3}, total: 3}, - {in: []intV{1, 2, 3, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3, 3}, total: 4}, - {in: []intV{1, 2, 3, 3, 3, 3, 3}, out: []intV{1, 2, 3, 3}, dups: []intV{3, 3, 3}, total: 5}, + {in: []int{1, 2, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{}, total: 2}, + {in: []int{1, 2, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3}, total: 3}, + {in: []int{1, 2, 3, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3, 3}, total: 4}, + {in: []int{1, 2, 3, 3, 3, 3, 3}, out: []int{1, 2, 3, 3}, dups: []int{3, 3, 3}, total: 5}, // mixing - {in: []intV{1, 1, 1, 1, 1, 2, 3, 3, 3, 4}, out: []intV{1, 1, 2, 3, 3, 4}, dups: []intV{1, 1, 1, 3}, total: 8}, - {in: []intV{1, 2, 3, 3, 3, 4, 4, 4}, out: []intV{1, 2, 3, 3, 4, 4}, dups: []intV{3, 4}, total: 6}, - {in: []intV{1, 1, 1, 2, 3, 4, 4, 4}, out: []intV{1, 1, 2, 3, 4, 4}, dups: []intV{1, 4}, total: 6}, - {in: []intV{1, 1, 1, 2, 2, 2, 3, 3, 3}, out: []intV{1, 1, 2, 2, 3, 3}, dups: []intV{1, 2, 3}, total: 9}, - {in: []intV{1, 1, 2, 2, 2, 3, 3}, out: []intV{1, 1, 2, 2, 3, 3}, dups: []intV{2}, total: 7}, - {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 4}, out: []intV{1, 1, 2, 2, 3, 3, 4, 4}, dups: []intV{2, 4}, total: 10}, - {in: []intV{1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5}, out: []intV{1, 1, 2, 2, 3, 3, 4, 4, 5, 5}, dups: []intV{2, 4}, total: 12}, - {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5}, out: []intV{1, 1, 2, 2, 3, 4, 4, 5, 5}, dups: []intV{2, 4, 5}, total: 11}, - {in: []intV{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9, 9}, out: []intV{1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, dups: []intV{2, 5}, total: 14}, + {in: []int{1, 1, 1, 1, 1, 2, 3, 3, 3, 4}, out: []int{1, 1, 2, 3, 3, 4}, dups: []int{1, 1, 1, 3}, total: 8}, + {in: []int{1, 2, 3, 3, 3, 4, 4, 4}, out: []int{1, 2, 3, 3, 4, 4}, dups: []int{3, 4}, total: 6}, + {in: []int{1, 1, 1, 2, 3, 4, 4, 4}, out: []int{1, 1, 2, 3, 4, 4}, dups: []int{1, 4}, total: 6}, + {in: []int{1, 1, 1, 2, 2, 2, 3, 3, 3}, out: []int{1, 1, 2, 2, 3, 3}, dups: []int{1, 2, 3}, total: 9}, + {in: []int{1, 1, 2, 2, 2, 3, 3}, out: []int{1, 1, 2, 2, 3, 3}, dups: []int{2}, total: 7}, + {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 4}, out: []int{1, 1, 2, 2, 3, 3, 4, 4}, dups: []int{2, 4}, total: 10}, + {in: []int{1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5}, out: []int{1, 1, 2, 2, 3, 3, 4, 4, 5, 5}, dups: []int{2, 4}, total: 12}, + {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5}, out: []int{1, 1, 2, 2, 3, 4, 4, 5, 5}, dups: []int{2, 4, 5}, total: 11}, + {in: []int{1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 5, 6, 7, 8, 8, 9, 9}, out: []int{1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9}, dups: []int{2, 5}, total: 14}, } for i, c := range cases { @@ -630,9 +630,9 @@ func TestRemoveDuplicatesMoreThan2(t *testing.T) { require.True(t, slices.IsSorted(c.out)) require.True(t, slices.IsSorted(c.dups)) require.Equal(t, len(c.dups), len(c.in)-len(c.out)) - tmpIn := make([]intV, len(c.in)) + tmpIn := make([]int, len(c.in)) copy(tmpIn, c.in) - out, dups, totalDup := removeDuplicatesMoreThanTwo(tmpIn) + out, dups, totalDup := removeDuplicatesMoreThanTwo(tmpIn, valGetter) require.EqualValues(t, c.out, out) require.EqualValues(t, c.dups, dups) require.Equal(t, c.total, totalDup) diff --git a/pkg/lightning/backend/external/writer.go b/pkg/lightning/backend/external/writer.go index a537a4a428b62..dc84855bfa924 100644 --- a/pkg/lightning/backend/external/writer.go +++ b/pkg/lightning/backend/external/writer.go @@ -422,15 +422,6 @@ func GetMaxOverlappingTotal(stats []MultipleFilesStat) int64 { return GetMaxOverlapping(points) } -type locationWithBuf struct { - loc membuf.SliceLocation - key []byte -} - -func (l *locationWithBuf) GetByte() []byte { - return l.key -} - // Writer is used to write data into external storage. type Writer struct { store storage.ExternalStorage @@ -444,9 +435,9 @@ type Writer struct { memSizeLimit uint64 - kvBuffer *membuf.Buffer - locationWithBuf []*locationWithBuf - kvSize int64 + kvBuffer *membuf.Buffer + kvLocations []membuf.SliceLocation + kvSize int64 onClose OnCloseFunc onDup engineapi.OnDuplicateKey @@ -495,10 +486,7 @@ func (w *Writer) WriteRow(ctx context.Context, key, val []byte, handle tidbkv.Ha copy(dataBuf[2*lengthBytes:], key) copy(dataBuf[2*lengthBytes+keyLen:], val) - w.locationWithBuf = append(w.locationWithBuf, &locationWithBuf{ - loc: loc, - key: dataBuf[2*lengthBytes : 2*lengthBytes+keyLen], - }) + w.kvLocations = append(w.kvLocations, loc) // TODO: maybe we can unify the size calculation during write to store. w.kvSize += int64(keyLen + len(val)) w.batchSize += uint64(length) @@ -526,11 +514,11 @@ func (w *Writer) Close(ctx context.Context) error { logutil.Logger(ctx).Info("close writer", zap.String("writerID", w.writerID), - zap.Int("kv-cnt-cap", cap(w.locationWithBuf)), + zap.Int("kv-cnt-cap", cap(w.kvLocations)), zap.String("minKey", hex.EncodeToString(w.minKey)), zap.String("maxKey", hex.EncodeToString(w.maxKey))) - w.locationWithBuf = nil + w.kvLocations = nil w.onClose(&WriterSummary{ WriterID: w.writerID, GroupOffset: w.groupOffset, @@ -558,7 +546,7 @@ func (w *Writer) recordMinMax(newMin, newMax tidbkv.Key, size uint64) { const flushKVsRetryTimes = 3 func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { - if len(w.locationWithBuf) == 0 { + if len(w.kvLocations) == 0 { return nil } @@ -569,13 +557,13 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { sortStart := time.Now() var ( dupFound bool - dupLoc *locationWithBuf + dupLoc *membuf.SliceLocation ) - slices.SortFunc(w.locationWithBuf, func(i, j *locationWithBuf) int { - res := bytes.Compare(i.key, j.key) + slices.SortFunc(w.kvLocations, func(i, j membuf.SliceLocation) int { + res := bytes.Compare(w.getKeyByLoc(&i), w.getKeyByLoc(&j)) if res == 0 && !dupFound { dupFound = true - dupLoc = i + dupLoc = &i } return res }) @@ -583,9 +571,9 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { metrics.GlobalSortWriteToCloudStorageDuration.WithLabelValues("sort").Observe(sortDuration.Seconds()) metrics.GlobalSortWriteToCloudStorageRate.WithLabelValues("sort").Observe(float64(w.batchSize) / 1024.0 / 1024.0 / sortDuration.Seconds()) - batchKVCnt := len(w.locationWithBuf) + batchKVCnt := len(w.kvLocations) var ( - dupLocs []*locationWithBuf + dupLocs []membuf.SliceLocation dupCnt int ) if dupFound { @@ -594,14 +582,14 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { case engineapi.OnDuplicateKeyRecord: // we don't have a global view, so need to keep duplicates with duplicate // count <= 2, so later we can find them. - w.locationWithBuf, dupLocs, dupCnt = removeDuplicatesMoreThanTwo(w.locationWithBuf) + w.kvLocations, dupLocs, dupCnt = removeDuplicatesMoreThanTwo(w.kvLocations, w.getKeyByLoc) w.kvSize = w.reCalculateKVSize() case engineapi.OnDuplicateKeyRemove: - w.locationWithBuf, _, dupCnt = removeDuplicates(w.locationWithBuf, false) + w.kvLocations, _, dupCnt = removeDuplicates(w.kvLocations, w.getKeyByLoc, false) w.kvSize = w.reCalculateKVSize() case engineapi.OnDuplicateKeyError: - dupKey := slices.Clone(w.getKeyByLoc(&dupLoc.loc)) - dupValue := slices.Clone(w.getValueByLoc(&dupLoc.loc)) + dupKey := slices.Clone(w.getKeyByLoc(dupLoc)) + dupValue := slices.Clone(w.getValueByLoc(dupLoc)) return common.ErrFoundDuplicateKeys.FastGenByArgs(dupKey, dupValue) } } @@ -610,7 +598,7 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { var dataFile, statFile, dupFile string // due to current semantic of OnDuplicateKeyRecord, if len(w.kvLocations) = 0, // len(dupLocs) is also 0 - if len(w.locationWithBuf) > 0 { + if len(w.kvLocations) > 0 { for i := range flushKVsRetryTimes { dataFile, statFile, dupFile, err = w.flushSortedKVs(ctx, dupLocs) if err == nil || ctx.Err() != nil { @@ -642,10 +630,10 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { metrics.GlobalSortWriteToCloudStorageRate.WithLabelValues("sort_and_write").Observe(float64(w.batchSize) / 1024.0 / 1024.0 / totalDuration.Seconds()) // maintain 500-batch statistics - if len(w.locationWithBuf) > 0 { - w.totalCnt += uint64(len(w.locationWithBuf)) + if len(w.kvLocations) > 0 { + w.totalCnt += uint64(len(w.kvLocations)) - minKey, maxKey := w.locationWithBuf[0].key, w.locationWithBuf[len(w.locationWithBuf)-1].key + minKey, maxKey := w.getKeyByLoc(&w.kvLocations[0]), w.getKeyByLoc(&w.kvLocations[len(w.kvLocations)-1]) w.recordMinMax(minKey, maxKey, uint64(w.kvSize)) w.addNewKVFile2MultiFileStats(dataFile, statFile, minKey, maxKey) @@ -662,7 +650,7 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) { }) } - w.locationWithBuf = w.locationWithBuf[:0] + w.kvLocations = w.kvLocations[:0] w.kvSize = 0 w.kvBuffer.Reset() w.batchSize = 0 @@ -692,7 +680,7 @@ func (w *Writer) addNewKVFile2MultiFileStats(dataFile, statFile string, minKey, w.fileMaxKeys = append(w.fileMaxKeys, tidbkv.Key(maxKey).Clone()) } -func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []*locationWithBuf) (string, string, string, error) { +func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []membuf.SliceLocation) (string, string, string, error) { logger := logutil.Logger(ctx).With( zap.String("writer-id", w.writerID), zap.Int("sequence-number", w.currentSeq), @@ -715,8 +703,8 @@ func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []*locationWithBuf) w.rc.reset() kvStore := NewKeyValueStore(ctx, dataWriter, w.rc) - for _, pair := range w.locationWithBuf { - err = kvStore.addEncodedData(w.kvBuffer.GetSlice(&pair.loc)) + for _, pair := range w.kvLocations { + err = kvStore.addEncodedData(w.kvBuffer.GetSlice(&pair)) if err != nil { return "", "", "", err } @@ -761,7 +749,7 @@ func (w *Writer) flushSortedKVs(ctx context.Context, dupLocs []*locationWithBuf) return dataFile, statFile, dupPath, nil } -func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []*locationWithBuf) (string, error) { +func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []membuf.SliceLocation) (string, error) { dupPath, dupWriter, err := w.createDupWriter(ctx) if err != nil { return "", err @@ -774,8 +762,8 @@ func (w *Writer) writeDupKVs(ctx context.Context, kvLocs []*locationWithBuf) (st } }() dupStore := NewKeyValueStore(ctx, dupWriter, nil) - for _, l := range kvLocs { - err = dupStore.addEncodedData(w.kvBuffer.GetSlice(&l.loc)) + for _, pair := range kvLocs { + err = dupStore.addEncodedData(w.kvBuffer.GetSlice(&pair)) if err != nil { return "", err } @@ -803,8 +791,8 @@ func (w *Writer) getValueByLoc(loc *membuf.SliceLocation) []byte { func (w *Writer) reCalculateKVSize() int64 { s := int64(0) - for _, loc := range w.locationWithBuf { - s += int64(loc.loc.Length) - 2*lengthBytes + for _, loc := range w.kvLocations { + s += int64(loc.Length) - 2*lengthBytes } return s } From c58991014962a096fd5598d44d577dedfbe3397d Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Sat, 6 Sep 2025 01:19:51 -0400 Subject: [PATCH 92/93] fix after merge Signed-off-by: Ruihao Chen --- pkg/lightning/mydump/loader_test.go | 4 ---- pkg/lightning/mydump/parquet_parser.go | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index 4a2157f73fd86..73006fdf18f23 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1194,10 +1194,6 @@ func testSampleParquetDataSize(t *testing.T, count int) { } md.WriteParquetFile(s.sourceDir, fileName, pc, count) - fileName := "test_1.t1.parquet" - err = store.WriteFile(ctx, fileName, bf.Bytes()) - require.NoError(t, err) - rowCount, rowSize, _, err := md.SampleStatisticsFromParquet(ctx, md.SourceFileMeta{ Path: fileName, }, store) diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index a8fe0b47dde6e..f82bfadb6742a 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -341,6 +341,10 @@ func (pp *ParquetParser) readSingleRows(row []types.Datum) error { pp.resetReader() } pp.curRowGroup++ + if pp.curRowGroup >= pp.totalRowGroup { + return io.EOF + } + for c := range len(pp.dumpers) { rowGroup := pp.readers[c].RowGroup(pp.curRowGroup) colReader, err := rowGroup.Column(c) From dddd4afcf839112d44caf7d46556c5634e0a484a Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 8 Sep 2025 02:37:29 -0400 Subject: [PATCH 93/93] update code Signed-off-by: Ruihao Chen --- .../importinto/encode_and_sort_operator.go | 12 +- .../encode_and_sort_operator_test.go | 2 +- pkg/disttask/importinto/subtask_executor.go | 10 +- pkg/disttask/importinto/task_executor.go | 9 +- pkg/disttask/importinto/wrapper.go | 2 - pkg/executor/importer/import.go | 16 +- pkg/executor/importer/table_import.go | 2 - pkg/lightning/mydump/BUILD.bazel | 5 +- pkg/lightning/mydump/allocator.go | 81 +++-- pkg/lightning/mydump/allocator_test.go | 2 +- pkg/lightning/mydump/append_only_allocator.go | 11 +- pkg/lightning/mydump/list_allocator.go | 310 ------------------ pkg/lightning/mydump/loader.go | 3 +- pkg/lightning/mydump/parquet_parser.go | 4 +- 14 files changed, 81 insertions(+), 388 deletions(-) delete mode 100644 pkg/lightning/mydump/list_allocator.go diff --git a/pkg/disttask/importinto/encode_and_sort_operator.go b/pkg/disttask/importinto/encode_and_sort_operator.go index 8caa9960fea7e..7c52c45fce315 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator.go +++ b/pkg/disttask/importinto/encode_and_sort_operator.go @@ -232,20 +232,16 @@ func subtaskPrefix(taskID, subtaskID int64) string { return path.Join(strconv.Itoa(int(taskID)), strconv.Itoa(int(subtaskID))) } -func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan) ( +func getWriterMemorySizeLimit(resource *proto.StepResource, plan *importer.Plan, encodeStep bool) ( dataKVMemSizePerCon, perIndexKVMemSizePerCon uint64) { indexKVGroupCnt := getNumOfIndexGenKV(plan.DesiredTableInfo) - threadCnt := plan.ThreadCnt - if plan.EncodeThreadCnt > 0 { - threadCnt = plan.EncodeThreadCnt - } - // We use a portion of the total available memory for data writer, which is depended // on the data format, and the other half for encoding and other stuffs, it's an // experience value, might not optimal. - memPerCon := resource.Mem.Capacity() / int64(threadCnt) - memForWriter := mydump.GetMemoryForWriter(plan.Format, int(memPerCon)) + memForWriter := mydump.GetMemoryForWriter( + encodeStep, plan.ParquetFileMemoryUsage, + plan.ThreadCnt, int(resource.Mem.Capacity())) // Then we divide those memory into indexKVGroupCnt + 3 shares, data KV writer // takes 3 shares, and each index KV writer takes 1 share. diff --git a/pkg/disttask/importinto/encode_and_sort_operator_test.go b/pkg/disttask/importinto/encode_and_sort_operator_test.go index 3c24800d1d306..75e14f36d9fb2 100644 --- a/pkg/disttask/importinto/encode_and_sort_operator_test.go +++ b/pkg/disttask/importinto/encode_and_sort_operator_test.go @@ -233,7 +233,7 @@ func TestGetWriterMemorySizeLimit(t *testing.T) { }, &importer.Plan{ DesiredTableInfo: info, ThreadCnt: 1, - }) + }, false) require.Equal(t, c.dataKVMemSizePerCon, dataKVMemSizePerCon, c.createSQL) if c.numOfIndexGenKV > 0 { require.Equal(t, c.perIndexKVMemSizePerCon, perIndexKVMemSizePerCon, c.createSQL) diff --git a/pkg/disttask/importinto/subtask_executor.go b/pkg/disttask/importinto/subtask_executor.go index 332681ed7f56b..fa51b0b148084 100644 --- a/pkg/disttask/importinto/subtask_executor.go +++ b/pkg/disttask/importinto/subtask_executor.go @@ -72,9 +72,15 @@ func (e *importMinimalTaskExecutor) Run( failpoint.Return(errors.New("occur an error when sort chunk")) }) failpoint.InjectCall("syncBeforeSortChunk") - chunkCheckpoint := toChunkCheckpoint(e.mTtask.Chunk) - chunkCheckpoint.FileMeta.ParquetMeta.MemoryPool = pool sharedVars := e.mTtask.SharedVars + + chunkCheckpoint := toChunkCheckpoint(e.mTtask.Chunk) + chunkCheckpoint.FileMeta.ParquetMeta = mydump.ParquetFileMeta{ + MemoryPool: pool, + MemoryUsage: sharedVars.TableImporter.ParquetFileMemoryUsage, + Loc: sharedVars.TableImporter.Location, + } + checksum := verify.NewKVGroupChecksumWithKeyspace(sharedVars.TableImporter.GetKeySpace()) if sharedVars.TableImporter.IsLocalSort() { if err := importer.ProcessChunk( diff --git a/pkg/disttask/importinto/task_executor.go b/pkg/disttask/importinto/task_executor.go index 83251e1fdd227..ee0360d563452 100644 --- a/pkg/disttask/importinto/task_executor.go +++ b/pkg/disttask/importinto/task_executor.go @@ -125,10 +125,6 @@ func (s *importStepExecutor) Init(ctx context.Context) (err error) { } }() - if s.taskMeta.Plan.Format == importer.DataFormatParquet && s.tableImporter.EncodeThreadCnt > 0 { - s.tableImporter.Plan.ThreadCnt = s.tableImporter.EncodeThreadCnt - } - if kerneltype.IsClassic() { taskManager, err = dxfstorage.GetTaskManager() if err != nil { @@ -160,7 +156,8 @@ func (s *importStepExecutor) Init(ctx context.Context) (err error) { s.tableImporter.CheckDiskQuota(s.importCtx) }() } - s.dataKVMemSizePerCon, s.perIndexKVMemSizePerCon = getWriterMemorySizeLimit(s.GetResource(), s.tableImporter.Plan) + s.dataKVMemSizePerCon, s.perIndexKVMemSizePerCon = getWriterMemorySizeLimit( + s.GetResource(), s.tableImporter.Plan, true) s.dataBlockSize = external.GetAdjustedBlockSize(s.dataKVMemSizePerCon, tidbconfig.MaxTxnEntrySizeLimit) s.indexBlockSize = external.GetAdjustedBlockSize(s.perIndexKVMemSizePerCon, external.DefaultBlockSize) s.logger.Info("KV writer memory buf info", @@ -375,7 +372,7 @@ func (m *mergeSortStepExecutor) Init(ctx context.Context) error { return err } m.sortStore = store - dataKVMemSizePerCon, perIndexKVMemSizePerCon := getWriterMemorySizeLimit(m.GetResource(), &m.taskMeta.Plan) + dataKVMemSizePerCon, perIndexKVMemSizePerCon := getWriterMemorySizeLimit(m.GetResource(), &m.taskMeta.Plan, false) m.dataKVPartSize = max(external.MinUploadPartSize, int64(dataKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) m.indexKVPartSize = max(external.MinUploadPartSize, int64(perIndexKVMemSizePerCon*uint64(external.MaxMergingFilesPerThread)/10000)) diff --git a/pkg/disttask/importinto/wrapper.go b/pkg/disttask/importinto/wrapper.go index 10144e39d1e00..af7d161cf56ee 100644 --- a/pkg/disttask/importinto/wrapper.go +++ b/pkg/disttask/importinto/wrapper.go @@ -31,7 +31,6 @@ func toChunkCheckpoint(chunk importer.Chunk) checkpoints.ChunkCheckpoint { Type: chunk.Type, Compression: chunk.Compression, FileSize: chunk.FileSize, - ParquetMeta: chunk.ParquetMeta, }, Chunk: mydump.Chunk{ PrevRowIDMax: chunk.PrevRowIDMax, @@ -54,6 +53,5 @@ func toChunk(chunkCheckpoint checkpoints.ChunkCheckpoint) importer.Chunk { Type: chunkCheckpoint.FileMeta.Type, Compression: chunkCheckpoint.FileMeta.Compression, Timestamp: chunkCheckpoint.Timestamp, - ParquetMeta: chunkCheckpoint.FileMeta.ParquetMeta, } } diff --git a/pkg/executor/importer/import.go b/pkg/executor/importer/import.go index baa62ba215502..fa72ea2f21528 100644 --- a/pkg/executor/importer/import.go +++ b/pkg/executor/importer/import.go @@ -254,7 +254,10 @@ type Plan struct { // in parquet is always adjusted to UTC, see // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp Location *time.Location - SQLMode mysql.SQLMode + // ParquetFileMemoryUsage is the estimated memory usage of each parquet parser. + ParquetFileMemoryUsage int + + SQLMode mysql.SQLMode // Charset is the charset of the data file when file is CSV or TSV. // it might be nil when using LOAD DATA and no charset is specified. // for IMPORT INTO, it is always non-nil and default to be defaultCharacterSet. @@ -273,7 +276,6 @@ type Plan struct { DiskQuota config.ByteSize Checksum config.PostOpLevel ThreadCnt int - EncodeThreadCnt int MaxNodeCnt int MaxWriteSpeed config.ByteSize SplitFile bool @@ -1319,19 +1321,11 @@ func (e *LoadDataController) InitDataFiles(ctx context.Context) error { // Fill memory usage info if sourceType == mydump.SourceTypeParquet && len(dataFiles) > 0 { - // We may not be able to open ThreadCnt files concurrently due to memory usage _, _, memoryUsage, err := mydump.SampleStatisticsFromParquet(ctx, *dataFiles[0], e.dataStore) - e.Plan.EncodeThreadCnt = mydump.AdjustEncodeThreadCnt(memoryUsage, e.Plan.ThreadCnt) - + e.Plan.ParquetFileMemoryUsage = memoryUsage if err != nil { return errors.Trace(err) } - for _, dataFile := range dataFiles { - // To reduce the memory usage, we only use streaming mode to read file. - dataFile.ParquetMeta = mydump.ParquetFileMeta{ - MemoryUsage: memoryUsage, - } - } } failpoint.Label("afterReadFiles") diff --git a/pkg/executor/importer/table_import.go b/pkg/executor/importer/table_import.go index bdf69cf533fea..04d24b0c97af1 100644 --- a/pkg/executor/importer/table_import.go +++ b/pkg/executor/importer/table_import.go @@ -306,8 +306,6 @@ func (ti *TableImporter) getParser(ctx context.Context, chunk *checkpoints.Chunk Remote: &chunk.FileMeta, } - info.Remote.ParquetMeta.Loc = ti.Location - parser, err := ti.LoadDataController.GetParser(ctx, info) if err != nil { return nil, err diff --git a/pkg/lightning/mydump/BUILD.bazel b/pkg/lightning/mydump/BUILD.bazel index 8e915cd3777eb..edfa5c75df55e 100644 --- a/pkg/lightning/mydump/BUILD.bazel +++ b/pkg/lightning/mydump/BUILD.bazel @@ -8,7 +8,6 @@ go_library( "bytes.go", "charset_convertor.go", "csv_parser.go", - "list_allocator.go", "loader.go", "parquet_parser.go", "parquet_type_converter.go", @@ -29,6 +28,7 @@ go_library( "//pkg/lightning/common", "//pkg/lightning/config", "//pkg/lightning/log", + "//pkg/lightning/membuf", "//pkg/lightning/metric", "//pkg/lightning/worker", "//pkg/parser", @@ -38,11 +38,8 @@ go_library( "//pkg/parser/mysql", "//pkg/types", "//pkg/util", - "//pkg/util/cpu", "//pkg/util/filter", - "//pkg/util/intest", "//pkg/util/logutil", - "//pkg/util/memory", "//pkg/util/regexpr-router", "//pkg/util/set", "//pkg/util/sqlescape", diff --git a/pkg/lightning/mydump/allocator.go b/pkg/lightning/mydump/allocator.go index b662dd974fbe9..70a41e7ee7994 100644 --- a/pkg/lightning/mydump/allocator.go +++ b/pkg/lightning/mydump/allocator.go @@ -17,50 +17,38 @@ package mydump import ( "unsafe" - "github.com/pingcap/tidb/pkg/util/cpu" - tidbmemory "github.com/pingcap/tidb/pkg/util/memory" + "github.com/apache/arrow-go/v18/arrow/memory" + "github.com/pingcap/tidb/pkg/lightning/log" + "github.com/pingcap/tidb/pkg/lightning/membuf" ) var ( // arenaSize is the size of each arena arenaSize = 64 << 20 - // parserMemoryPercent defines the percentage of memory used for parser - parserMemoryPercent = 0.3 - - // parquetWriterPercent defines the percentage of memory used for parquet writer - parquetWriterPercent = 0.4 - - // otherWriterPercent defines the percentage of memory used for csv writer - otherWriterPercent = 0.5 + // maxParquetMemoryPercent defines the maximum percentage of memory used for parquet parser + // Because less memory for writer can make more small files, which may affect performance + // merge and ingest steps, so we set a limit here. + maxParquetMemoryPercent = 40 ) -// GetMemoryForWriter gets the memory for writer according to the file type. -func GetMemoryForWriter(tp string, memPerCon int) int { - switch tp { - case "parquet": - return int(float64(memPerCon) * parquetWriterPercent) - default: - return int(float64(memPerCon) * otherWriterPercent) - } -} - -// AdjustEncodeThreadCnt adjusts the concurrency in encode&sort step for parquet IMPORT INTO. -func AdjustEncodeThreadCnt(memoryPerFile, threadCnt int) int { - totalCPU := cpu.GetCPUCount() - totalMem, err := tidbmemory.MemTotal() - if err != nil { - return threadCnt - } - - if totalCPU <= 0 || totalMem <= 0 { - return threadCnt +// GetMemoryForWriter gets the memory for writer +func GetMemoryForWriter(encodeStep bool, parquetMemUsage, threadCnt, totalMem int) int64 { + memPerCon := totalMem / threadCnt + + writerPercent := 50 + if encodeStep { + upperLimit := totalMem * maxParquetMemoryPercent / 100 + if parquetMemUsage >= upperLimit { + log.L().Warn("parquet parser memory usage is too high, may cause OOM") + } + actualUsage := min(parquetMemUsage*threadCnt, upperLimit) + parserPercent := (actualUsage*100/totalMem + 9) / 10 * 10 + writerPercent = (100 - parserPercent) / 2 } - // Use half of memory per conn for parquet parser - memForImport := int(float64(int(totalMem)/totalCPU)*parserMemoryPercent) * threadCnt - optimalThreads := memForImport / memoryPerFile - return max(1, min(optimalThreads, threadCnt)) + // Use half of the remaining memory for writer + return int64(memPerCon * writerPercent / 100) } // Pool manages a pool of reusable byte buffers to reduce memory allocation overhead. @@ -68,17 +56,33 @@ func AdjustEncodeThreadCnt(memoryPerFile, threadCnt int) int { type Pool struct { blockSize int blockCache chan []byte + limit int + // As we may not be able to open all files concurrently due to memory usage, + // we use a memory limiter to limit the memory usage of parquet parser. + limiter *membuf.Limiter } // GetPool gets a pool with the given capacity. func GetPool(capacity int) *Pool { - mem := int(float64(capacity) * parserMemoryPercent) + limit := capacity * maxParquetMemoryPercent / 100 return &Pool{ blockSize: arenaSize, - blockCache: make(chan []byte, (mem+arenaSize-1)/arenaSize), + blockCache: make(chan []byte, (limit+arenaSize-1)/arenaSize), + limit: limit, + limiter: membuf.NewLimiter(limit), } } +// Acquire acquires memory from the pool. +func (p *Pool) Acquire(quota int) { + p.limiter.Acquire(quota) +} + +// Release releases memory to the pool. +func (p *Pool) Release(quota int) { + p.limiter.Release(quota) +} + // Get retrieves a buffer from the pool or allocates a new one if the pool is empty. func (p *Pool) Get() []byte { select { @@ -116,3 +120,8 @@ type arena interface { free([]byte) reset() } + +type AllocatorWithClose interface { + memory.Allocator + Close() +} diff --git a/pkg/lightning/mydump/allocator_test.go b/pkg/lightning/mydump/allocator_test.go index 83d0fb25edcfa..370f97832feb0 100644 --- a/pkg/lightning/mydump/allocator_test.go +++ b/pkg/lightning/mydump/allocator_test.go @@ -27,7 +27,7 @@ func TestSimpleAllocator(t *testing.T) { arenaSize = 16 << 20 pool := GetPool(16 << 23) - a := NewAppendOnlyAllocator(pool) + a := NewAppendOnlyAllocator(pool, 0) var ( lk sync.Mutex diff --git a/pkg/lightning/mydump/append_only_allocator.go b/pkg/lightning/mydump/append_only_allocator.go index f014ccfbc0c6c..5e2106aa28033 100644 --- a/pkg/lightning/mydump/append_only_allocator.go +++ b/pkg/lightning/mydump/append_only_allocator.go @@ -57,15 +57,21 @@ type appendOnlyAllocator struct { slicesMutex sync.RWMutex mapper sync.Map + memUsage int + nextAllocIdx atomic.Int32 externalMemoryCurrent atomic.Int64 externalMemoryMax atomic.Int64 } -func NewAppendOnlyAllocator(pool *Pool) *appendOnlyAllocator { +// NewAppendOnlyAllocator creates a new appendOnlyAllocator with the given memory pool +func NewAppendOnlyAllocator(pool *Pool, memUsage int) *appendOnlyAllocator { + memUsage = min(memUsage, pool.limit) + pool.Acquire(memUsage) alloc := &appendOnlyAllocator{ - pool: pool, + pool: pool, + memUsage: memUsage, } for range 2 { alloc.slices = append(alloc.slices, &appendOnlySlice{buf: pool.Get()}) @@ -153,6 +159,7 @@ func (a *appendOnlyAllocator) Close() { for _, s := range a.slices { a.pool.Put(s.buf) } + a.pool.Release(a.memUsage) } func (a *appendOnlyAllocator) check() { diff --git a/pkg/lightning/mydump/list_allocator.go b/pkg/lightning/mydump/list_allocator.go deleted file mode 100644 index 5b2ccdce3e4ba..0000000000000 --- a/pkg/lightning/mydump/list_allocator.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mydump - -import ( - "math" - "sync" - - "github.com/apache/arrow-go/v18/arrow/memory" - "github.com/pingcap/tidb/pkg/util/intest" -) - -const ( - // metaSize is the size of metadata for each allocated block (64 bytes for alignment) - metaSize = 64 - - // invalid represents an invalid offset/pointer in the linked list - invalid = math.MaxInt32 - - // alignSize defines the alignment boundary for memory allocation. - alignSize = 1 << 10 -) - -func roundUp(n, sz int) int { - return (n + sz - 1) / sz * sz -} - -func storeInt(value int, buf []byte) { - buf[0] = byte(value >> 24) - buf[1] = byte(value >> 16) - buf[2] = byte(value >> 8) - buf[3] = byte(value) -} - -func readInt(buf []byte) int { - return int(buf[0])<<24 | int(buf[1])<<16 | int(buf[2])<<8 | int(buf[3]) -} - -/* -simpleAllocator is a memory allocator that manages allocated memory using a linked list structure. -It provides basic memory allocation and deallocation with automatic merging of adjacent free blocks -to reduce fragmentation. - -While this allocator has relatively low allocation efficiency due to its linear search algorithm, -it is sufficient for parquet reader scenarios where memory allocation is not the primary bottleneck. - -Memory Layout: -Each block has the following structure: -- Size (4 bytes): Size of the block including metadata -- Previous offset (4 bytes): Offset to the previous block in the free list -- Next offset (4 bytes): Offset to the next block in the free list -- Reserved (52 bytes): Reserved space for alignment -- Data: The actual allocated data follows the metadata - -The allocator maintains a linked list of free blocks: - - ┌───────────────────────────┐ - │ ▼ - ┌─────────────────────────────────────────────────────────────────┐ - │ │ s │ p │ n │ xxxx │ │ s │ p │ n │ xxxx │ │ - └─────────────────────────────────────────────────────────────────┘ - ▲ │ - └───────────────────────────────────┘ - -Where: -- s = size of block -- p = previous block offset -- n = next block offset -- xxxx = reserved space -*/ -type simpleAllocator struct { - buf []byte - base int - - // Number of blocks and bytes allocated - blocksAlloc int - bytesAloc int -} - -func getSimpleAllocator(buf []byte) arena { - a := &simpleAllocator{ - buf: buf, - base: int(addressOf(buf)), - } - a.reset() - return a -} - -func (sa *simpleAllocator) getOffset(buf []byte) int { - return int(addressOf(buf)) - sa.base - metaSize -} - -func (sa *simpleAllocator) setBlk(offset, prev, next, blkSize int) { - if blkSize >= 0 { - storeInt(blkSize, sa.buf[offset:offset+4]) - } - if prev >= 0 { - storeInt(prev, sa.buf[offset+4:offset+8]) - } - if next >= 0 { - storeInt(next, sa.buf[offset+8:offset+12]) - } -} - -func (sa *simpleAllocator) getBlk(offset int) (prev, next, blkSize int) { - blkSize = readInt(sa.buf[offset : offset+4]) - prev = readInt(sa.buf[offset+4 : offset+8]) - next = readInt(sa.buf[offset+8 : offset+12]) - return -} - -func (sa *simpleAllocator) insertFree(free int) { - _, _, freeSize := sa.getBlk(free) - - for offset := 0; offset != invalid; { - if free > offset { - _, next, _ := sa.getBlk(offset) - sa.setBlk(offset, -1, free, -1) - sa.setBlk(free, offset, next, -1) - sa.setBlk(next, free, -1, -1) - sa.bytesAloc -= freeSize - return - } - } - panic("Error insertFree") -} - -// merge coalesces adjacent free blocks into larger blocks to reduce fragmentation. -func (sa *simpleAllocator) merge() { - for offset := 0; offset != invalid; { - _, next, blkSize := sa.getBlk(offset) - if offset+blkSize == next { - _, nextnext, nextBlkSize := sa.getBlk(next) - sa.setBlk(offset, -1, nextnext, blkSize+nextBlkSize) - sa.setBlk(nextnext, offset, -1, -1) - } else { - offset = next - } - } -} - -func (sa *simpleAllocator) allocate(size int) []byte { - sa.merge() - - allocSize := roundUp(size+metaSize, alignSize) - - bestOffset := -1 - minRemain := math.MaxInt32 - - for offset := 0; offset != invalid; { - _, next, blkSize := sa.getBlk(offset) - if offset+blkSize >= len(sa.buf) { - panic("Error blk size") - } - if blkSize >= allocSize && blkSize-allocSize < minRemain { - bestOffset = offset - minRemain = blkSize - allocSize - if minRemain == 0 { - break - } - } - offset = next - } - - if bestOffset == -1 { - return nil - } - - if minRemain == 0 { - prev, next, _ := sa.getBlk(bestOffset) - sa.setBlk(prev, -1, next, -1) - sa.setBlk(next, prev, -1, -1) - } else { - sa.setBlk(bestOffset, -1, -1, minRemain) - } - - sa.blocksAlloc++ - sa.bytesAloc += allocSize - bufStart := bestOffset + minRemain - sa.setBlk(bufStart, -1, -1, allocSize) - sa.sanityCheck() - return sa.buf[bufStart+metaSize : bufStart+metaSize+size] -} - -func (sa *simpleAllocator) free(buf []byte) { - offset := sa.getOffset(buf) - if offset < 0 || offset >= len(sa.buf) { - return - } - - sa.blocksAlloc-- - if sa.blocksAlloc == 0 { - sa.reset() - return - } - - sa.insertFree(offset) - sa.sanityCheck() -} - -func (sa *simpleAllocator) sanityCheck() { - if !intest.InTest { - return - } - - mem := sa.bytesAloc - for offset := 0; offset != invalid; { - _, next, blkSize := sa.getBlk(offset) - mem += blkSize - offset = next - } - if mem != (len(sa.buf) - 3*alignSize) { - panic("sanity check failed: memory accounting mismatch") - } -} - -func (sa *simpleAllocator) reset() { - sa.blocksAlloc = 0 - sa.bytesAloc = 0 - - total := len(sa.buf) - sa.setBlk(0, invalid, alignSize, 0) - sa.setBlk(alignSize, 0, total-alignSize, total-alignSize*3) - sa.setBlk(total-alignSize, alignSize, invalid, 0) -} - -// listAllocator implements memory.Allocator interface using multiple arenas. -type listAllocator struct { - mu sync.RWMutex - - arenas []arena - mbufs [][]byte - pool *Pool - - allocatedBuf map[uintptr]int -} - -func (alloc *listAllocator) Allocate(size int) []byte { - if size >= arenaSize { - return make([]byte, size) - } - - alloc.mu.Lock() - defer alloc.mu.Unlock() - - for i, a := range alloc.arenas { - if buf := a.allocate(size); buf != nil { - alloc.allocatedBuf[addressOf(buf)] = i - return buf - } - } - - mbuf := alloc.pool.Get() - alloc.mbufs = append(alloc.mbufs, mbuf) - - na := getSimpleAllocator(mbuf) - alloc.arenas = append(alloc.arenas, na) - - buf := na.allocate(size) - arenaIndex := len(alloc.arenas) - alloc.allocatedBuf[addressOf(buf)] = arenaIndex - - return buf -} - -func (alloc *listAllocator) Free(buf []byte) { - addr := addressOf(buf) - alloc.mu.Lock() - defer alloc.mu.Unlock() - - if arenaID, ok := alloc.allocatedBuf[addr]; ok { - alloc.arenas[arenaID].free(buf) - delete(alloc.allocatedBuf, addr) - } -} - -func (alloc *listAllocator) Reallocate(size int, buf []byte) []byte { - alloc.Free(buf) - return alloc.Allocate(size) -} - -func (alloc *listAllocator) Close() { - for _, mbuf := range alloc.mbufs { - alloc.pool.Put(mbuf) - } -} - -func (alloc *listAllocator) Allocated() int { - return arenaSize * len(alloc.arenas) -} - -// NewAllocator creates a new default allocator with the given pool. -func NewAllocator(pool *Pool) memory.Allocator { - return &listAllocator{ - pool: pool, - allocatedBuf: make(map[uintptr]int, 32), - } -} diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 3e6d570edb22a..bc0c75bd32781 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -114,7 +114,8 @@ type SourceFileMeta struct { RealSize int64 Rows int64 - ParquetMeta ParquetFileMeta // only for parquet + // ParquetMeta store meta only used for parquet + ParquetMeta ParquetFileMeta } // NewMDTableMeta creates an Mydumper table meta with specified character set. diff --git a/pkg/lightning/mydump/parquet_parser.go b/pkg/lightning/mydump/parquet_parser.go index f82bfadb6742a..d2d4b0e26d0f3 100644 --- a/pkg/lightning/mydump/parquet_parser.go +++ b/pkg/lightning/mydump/parquet_parser.go @@ -560,7 +560,7 @@ func NewParquetParser( var allocator memory.Allocator allocator = memory.NewGoAllocator() if meta.MemoryPool != nil { - allocator = NewAppendOnlyAllocator(meta.MemoryPool) + allocator = NewAppendOnlyAllocator(meta.MemoryPool, meta.MemoryUsage) } prop := parquet.NewReaderProperties(allocator) @@ -648,7 +648,7 @@ func SampleStatisticsFromParquet( } meta := GetDefaultParquetMeta() - meta.MemoryPool = GetPool(2 << 30) // use up to 2GB memory for sampling + meta.MemoryPool = GetPool(10 << 30) // use up to 4GiB memory for sampling parser, err := NewParquetParser(ctx, store, r, fileMeta.Path, meta) if err != nil {