Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
2af5690
add chaos test
GMHDBJD Sep 22, 2022
341e0e7
fix fmt
GMHDBJD Sep 26, 2022
deb7b1e
update
GMHDBJD Sep 27, 2022
6106ab6
update
GMHDBJD Sep 27, 2022
2b07319
update
GMHDBJD Sep 27, 2022
e55555f
update
GMHDBJD Sep 27, 2022
5bd247d
debug ci
GMHDBJD Sep 27, 2022
bc5c12a
update kind config
GMHDBJD Sep 27, 2022
162edf5
Revert "debug ci"
GMHDBJD Sep 27, 2022
003cf68
debug: reorder
GMHDBJD Sep 27, 2022
4051995
debug: describe
GMHDBJD Sep 27, 2022
8fb5f11
debug: see log
GMHDBJD Sep 27, 2022
d24468b
debug: remove worker
GMHDBJD Sep 27, 2022
05b26ea
debug: upgrade
GMHDBJD Sep 28, 2022
fa0c1c9
debug: upgrade kind
GMHDBJD Sep 28, 2022
3f21e45
debug: upgrade chaos mesh
GMHDBJD Sep 28, 2022
998cb91
debug: remove master
GMHDBJD Sep 28, 2022
e1f45f0
debug: use kind v1.4.0
GMHDBJD Sep 28, 2022
9d9cf1d
debug: fix
GMHDBJD Sep 28, 2022
5c9aea6
update dm chaos mesh
GMHDBJD Sep 28, 2022
9d466f5
trigger
GMHDBJD Sep 28, 2022
b59202a
add debug log
GMHDBJD Sep 28, 2022
12b125f
add more debug log
GMHDBJD Sep 28, 2022
f5c0f3a
add more debug log
GMHDBJD Sep 28, 2022
90054b9
debug: add more log
GMHDBJD Sep 29, 2022
f63bf6c
debug: add more time
GMHDBJD Sep 29, 2022
c401498
debug: add more log
GMHDBJD Sep 29, 2022
b77335d
fix
GMHDBJD Sep 29, 2022
c52e221
Merge branch 'master' into addChaos
GMHDBJD Sep 29, 2022
c846530
change log level
GMHDBJD Sep 30, 2022
9cd33ab
update
GMHDBJD Oct 9, 2022
79a6c43
update
GMHDBJD Oct 10, 2022
8124aa0
update
GMHDBJD Oct 10, 2022
74f0596
update
GMHDBJD Oct 10, 2022
0c3b2b2
update
GMHDBJD Oct 10, 2022
ff6b001
increase diff time
GMHDBJD Oct 10, 2022
c6a0751
Merge branch 'master' into addChaos
GMHDBJD Oct 11, 2022
25384dd
no need extStorage if sync is not fresh
GMHDBJD Oct 11, 2022
5f1cdb8
Merge branch 'fixSyncExtStorage' into addChaos
GMHDBJD Oct 11, 2022
eceb9c6
genFullData before createJob
GMHDBJD Oct 11, 2022
14ea080
wait source at first
GMHDBJD Oct 11, 2022
32a74d2
wait dm enter sync stage
GMHDBJD Oct 11, 2022
33ea64b
Merge branch 'master' into fixSyncExtStorage
GMHDBJD Oct 11, 2022
78df0b1
fix no key
GMHDBJD Oct 11, 2022
d330c95
fix deadline exceeded
GMHDBJD Oct 11, 2022
95d1bf1
Merge branch 'master' into fixSyncExtStorage
GMHDBJD Oct 11, 2022
2bbc179
Merge branch 'master' into addChaos
GMHDBJD Oct 12, 2022
a508ac4
address comment
GMHDBJD Oct 12, 2022
82df15c
Merge branch 'fixSyncExtStorage' into addChaos
GMHDBJD Oct 12, 2022
a26d807
remove triger
GMHDBJD Oct 12, 2022
0e7a276
Merge branch 'master' into addChaos
GMHDBJD Oct 12, 2022
7f2517a
fix lint
GMHDBJD Oct 12, 2022
ee37202
address comment
GMHDBJD Oct 12, 2022
5898535
Revert "remove triger"
GMHDBJD Oct 12, 2022
782eca6
retry create job
GMHDBJD Oct 13, 2022
b7c78bc
change log level
GMHDBJD Oct 13, 2022
4998d7a
remove trigger
GMHDBJD Oct 13, 2022
9c22288
Merge branch 'master' into addChaos
ti-chi-bot Oct 13, 2022
502d362
Merge remote-tracking branch 'upstream/master' into addChaos
GMHDBJD Oct 13, 2022
8c316a2
Merge branch 'master' into addChaos
ti-chi-bot Oct 14, 2022
6a38d07
Merge branch 'master' into addChaos
ti-chi-bot Oct 14, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions .github/workflows/dataflow_engine_chaos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
base:
# The type of runner that the job will run on
runs-on: ubuntu-20.04
timeout-minutes: 30
timeout-minutes: 50
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
key: ${{ runner.os }}-dataflow-${{ hashFiles('go.sum') }}

- name: Create k8s Kind Cluster
uses: helm/kind-action@v1.2.0
uses: helm/kind-action@v1.4.0
with:
cluster_name: dataflow-engine-cluster
config: ${{ github.workspace }}/engine/chaos/manifests/kind-cluster.yaml
Expand All @@ -71,7 +71,9 @@ jobs:
helm version

- name: Build dataflow engine binary
run: make tiflow tiflow-chaos-case
run: |
make tiflow tiflow-chaos-case
cp -r $GITHUB_WORKSPACE/engine/chaos/cases/conf/ $GITHUB_WORKSPACE/bin/engine-conf

- name: Build Dataflow engine docker image
run: |
Expand All @@ -82,6 +84,61 @@ jobs:
run: |
kind load docker-image dataflow:chaos --name dataflow-engine-cluster

# Set up upstream instances
- name: Set up sources
run: |
kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/sources.yaml
- name: Wait for sources ready # kubectl wait --all not working
run: |
kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=300s || true
kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=300s || true
kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=300s || true
sleep 10
echo show pvc
kubectl get pvc -l app=sources -o wide
echo show pv
kubectl get pv -o wide
echo show svc
kubectl get svc -l app=sources -o wide
echo show sts
kubectl get sts -l app=sources -o wide
echo show po
kubectl get po -l app=sources -o wide
echo describe po
kubectl describe po -l app=sources
echo describe pvc
kubectl describe pvc -l app=sources
kubectl wait --for=condition=Ready pod/mysql57-0 --timeout=0s
kubectl wait --for=condition=Ready pod/mysql8-0 --timeout=0s
kubectl wait --for=condition=Ready pod/mariadb-0 --timeout=0s

# Set up downstream TiDB instance (deploy a TiDB with mockTiKV, not a TidbCluster managed by TiDB-operator)
- name: Set up TiDB
run: |
kubectl apply -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
kubectl get -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
kubectl describe -f $GITHUB_WORKSPACE/dm/chaos/manifests/tidb.yaml
- name: Wait for TiDB ready
run: |
kubectl wait --for=condition=Ready pod/tidb-0 --timeout=10m || true
echo show pvc
kubectl get pvc -l app=tidb -o wide
echo show pv
kubectl get pv -o wide
echo show svc
kubectl get svc -l app=tidb -o wide
echo show sts
kubectl get sts -l app=tidb -o wide
echo show po
kubectl get po -l app=tidb -o wide
echo describe po
kubectl describe po -l app=tidb
echo describe pvc
kubectl describe pvc -l app=tidb
kubectl wait --for=condition=Ready pod/tidb-0 --timeout=0s

# Set up metastore and basic services
- name: Set up metastore and basic services
run: |
Expand Down Expand Up @@ -196,6 +253,17 @@ jobs:
kubectl describe -f $GITHUB_WORKSPACE/engine/chaos/manifests/cases.yaml
kubectl get pods

# FIXME: remove this after fix https://github.com/pingcap/tiflow/issues/7304
- name: Wait DM enter sync stage
run: |
for idx in $(seq 0 300); do
echo "wait dm enter sync stage"
if kubectl logs job.batch/chaos-test-case | grep "full mode of the task has completed" ; then
break
fi
sleep 1
done

- name: Encode chaos-mesh action
run: |
echo CFG_BASE64=$(base64 -w 0 $GITHUB_WORKSPACE/engine/chaos/manifests/${{ matrix.chaos-obj }}.yaml) >> $GITHUB_ENV
Expand All @@ -204,7 +272,6 @@ jobs:
uses: chaos-mesh/chaos-mesh-action@master
env:
CFG_BASE64: ${{ env.CFG_BASE64 }}
CHAOS_MESH_VERSION: v1.0.0

# check whether complete with 1m * 20 times.
- name: Wait for chaos test case complete
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/dm_chaos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
base:
# The type of runner that the job will run on
runs-on: ubuntu-20.04
timeout-minutes: 30
timeout-minutes: 50
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -68,7 +68,7 @@ jobs:
key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }}

- name: Create k8s Kind Cluster
uses: helm/kind-action@v1.2.0
uses: helm/kind-action@v1.4.0

- name: Print cluster information
run: |
Expand Down Expand Up @@ -247,7 +247,6 @@ jobs:
uses: chaos-mesh/chaos-mesh-action@master
env:
CFG_BASE64: ${{ env.CFG_BASE64 }}
CHAOS_MESH_VERSION: v1.0.0

# check whether complete with 1m * 20 times.
- name: Wait for chaos test case complete
Expand Down
29 changes: 16 additions & 13 deletions dm/chaos/manifests/io-chaos-dm.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
apiVersion: chaos-mesh.org/v1alpha1
kind: IoChaos
kind: Schedule
metadata:
name: io-delay-dm
labels:
app: io-delay-dm
spec:
action: latency
mode: one
selector:
pods:
default: # default namespace
schedule: '@every 2m'
type: IOChaos
historyLimit: 5
concurrencyPolicy: Forbid
ioChaos:
action: latency
mode: one
selector:
pods:
default:
- dm-master-0
- dm-master-1
- dm-master-2
- dm-worker-0
- dm-worker-1
- dm-worker-2
- dm-worker-3
volumePath: /data
path: "/data/**/*"
delay: "100ms"
percent: 50
duration: "60s"
scheduler:
cron: "@every 2m"
volumePath: /data
path: /data/**/*
delay: 100ms
percent: 50
duration: 60s
112 changes: 15 additions & 97 deletions dm/chaos/manifests/network-emulation-dm.yaml
Original file line number Diff line number Diff line change
@@ -1,110 +1,28 @@
---
# A Network Loss action causes network packets to drop randomly
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
kind: Schedule
metadata:
name: network-loss-dm
labels:
app: network-loss-dm
spec:
action: loss
mode: one
selector:
pods:
default: # default namespace
schedule: 2-59/5 * * * *
type: NetworkChaos
historyLimit: 5
concurrencyPolicy: Forbid
networkChaos:
action: loss
mode: one
selector:
pods:
default:
- dm-master-0
- dm-master-1
- dm-master-2
- dm-worker-0
- dm-worker-1
- dm-worker-2
- dm-worker-3
loss:
loss: "25"
correlation: "25"
duration: "30s"
scheduler:
cron: "2-59/5 * * * *" # At every 5th minute from 2 through 59, (2, 7, 12, ...)


# A Network Delay action causes delays in message sending
---
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
metadata:
name: network-delay-dm
labels:
app: network-delay-dm
spec:
action: delay
mode: one
selector:
pods:
default: # default namespace
- dm-master-0
- dm-master-1
- dm-master-2
- dm-worker-0
- dm-worker-1
- dm-worker-2
delay:
latency: "90ms"
correlation: "25"
jitter: "90ms"
duration: "30s"
scheduler:
cron: "3-59/5 * * * *" # At every 5th minute from 3 through 59, (3, 8, 13, ...)

---
# A Network Duplicate action causes packet duplication
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
metadata:
name: network-duplicate-dm
labels:
app: network-duplicate-dm
spec:
action: duplicate
mode: one
selector:
pods:
default: # default namespace
- dm-master-0
- dm-master-1
- dm-master-2
- dm-worker-0
- dm-worker-1
- dm-worker-2
duplicate:
duplicate: "40"
correlation: "25"
duration: "30s"
scheduler:
cron: "4-59/5 * * * *" # At every 5th minute from 4 through 59, (4, 9, 14, ...)

---
# A Network Corrupt action causes packet corruption
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
metadata:
name: network-corrupt-dm
labels:
app: network-corrupt-dm
spec:
action: corrupt
mode: one
selector:
pods:
default: # default namespace
- dm-master-0
- dm-master-1
- dm-master-2
- dm-worker-0
- dm-worker-1
- dm-worker-2
corrupt:
corrupt: "40"
correlation: "25"
duration: "30s"
scheduler:
cron: "5-59/5 * * * *" # At every 5th minute from 5 through 59, (5, 10, 15, ...)
loss:
loss: "25"
correlation: "25"
duration: 30s
53 changes: 15 additions & 38 deletions dm/chaos/manifests/network-partition-dm.yaml
Original file line number Diff line number Diff line change
@@ -1,47 +1,24 @@
---
# network partition between DM-worker and DM-master
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
kind: Schedule
metadata:
name: network-partition-dm-worker-master
labels:
app: network-partition-dm-worker-master
spec:
action: partition
mode: one
selector:
labelSelectors:
"app": "dm-worker"
direction: both
target:
selector:
labelSelectors:
"app": "dm-master"
schedule: 2-59/3 * * * *
type: NetworkChaos
historyLimit: 5
concurrencyPolicy: Forbid
networkChaos:
action: partition
mode: one
duration: "30s"
scheduler:
cron: "2-59/3 * * * *" # At every 3rd minute from 2 through 59, (2, 5, 8, ...)

---
# network partition between DM-master members
apiVersion: chaos-mesh.org/v1alpha1
kind: NetworkChaos
metadata:
name: network-partition-dm-master-master
labels:
app: network-partition-dm-master-master
spec:
action: partition
mode: one
selector:
labelSelectors:
"app": "dm-master"
direction: both
target:
selector:
labelSelectors:
"app": "dm-master"
mode: one
duration: "30s"
scheduler:
cron: "3-59/3 * * * *" # At every 3rd minute from 3 through 59, (3, 6, 9, ...)
app: dm-worker
direction: both
target:
selector:
labelSelectors:
app: dm-master
mode: one
duration: 30s
Loading