diff --git a/.packit.yaml b/.packit.yaml index af4eb3f..cdaa1e5 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -10,12 +10,35 @@ files_to_sync: # name in upstream package repository/registry (e.g. in PyPI) upstream_package_name: catdoc -# doesn't work: upstream_tag_template: v{%version} +upstream_tag_template: v{version} # downstream (Fedora) RPM package name downstream_package_name: catdoc +actions: + # Without this, packit infers the version from the last git tag (v0.96) and + # overwrites Version: in the spec. This reads the version from configure.ac. + get-current-version: + - autoconf --trace='AC_INIT:$2' configure.ac + create-archive: + - autoreconf -fiv + - ./configure + - make dist + # packit uses the last line of output as the archive path + - bash -c "ls catdoc-*.tar.gz" + +srpm_build_deps: + - autoconf + - automake + - make + - gcc + jobs: - job: copr_build trigger: pull_request targets: [fedora-stable] +- job: copr_build + trigger: release + targets: [fedora-stable] + owner: skierpage + project: catdoc diff --git a/Makefile.am b/Makefile.am index 0e79201..017fba9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,10 @@ SUBDIRS = src tests +man_MANS = doc/catdoc.1 doc/catppt.1 doc/xls2csv.1 +if BUILD_WORDVIEW +man_MANS += doc/wordview.1 +endif + EXTRA_DIST = charsets doc/catdoc.1.in doc/catppt.1.in doc/xls2csv.1.in if BUILD_WORDVIEW EXTRA_DIST += doc/wordview.1.in diff --git a/NEWS b/NEWS index eb4bc4e..913c94c 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ - 0.97 development pre-release ?? 2025 + 0.97 March 8 2026 Remove 16-bit DOS support. Use version 0.96 if you're still building and running the catdoc programs in DOS. Incorporated Debian patches for the three 2024 CVEs identified and @@ -12,12 +12,15 @@ results. Add `autoconf -fiv` to INSTALL steps. Users can set a CHARSETPATH environment variable to point to one or more non-standard charsets directory locations (as an alternative - to charset_path in .catdocrc). + to charset_path in .catdocrc). This allows testing without doing + `make install`. Fixed handling of destination charsets with chars > u7FFF (e.g. when specifying mac-roman as destination charset), and of missing charsets; commit 8866ca937 (cherry-picked from vbwagner's upstream GitHub repository). Also added test for the bug. + The other vbwagner commit in 2025, "Fix ole name bug", was already + incorporated in this fork's version 0.96. Developed test framework for memory access errors caught by address sanitizer. @@ -35,6 +38,7 @@ Dean Pierce in 2015 using American Fuzzy Lop software fuzzer, reported to the oss-sec mailing list and uploaded to https://catdocbugs.neocities.org/ . + Added build automation to trigger Fedora Copr builds. 0.96 July 7 2025 This incorporates several patches to catdoc that have diff --git a/README.md b/README.md index ae28ad3..e7385b2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# catdoc version 0.97 in development +# catdoc version 0.97 `catdoc` is a program which reads MS-Office Word `.doc` files and prints their content as readable ASCII text to stdout. It can also produce correct @@ -25,17 +25,21 @@ the text of old MS-Office files. user-definable output formats and support for Word97 files, which contain UNICODE internally. -## version 0.97 in development +## version 0.97 -This in-development next release of the catdoc programs incorporates the Debian -patches for the vulnerabilities -[CVE-2024-54028](https://nvd.nist.gov/vuln/detail/CVE-2024-54028), -[CVE-2024-52035](https://nvd.nist.gov/vuln/detail/CVE-2024-52035), -and -[CVE-2024-48877](https://nvd.nist.gov/vuln/detail/CVE-2024-48877) -identified and addressed by the Cisco Talos team. -The patched source code _no longer compiles_ in Borland Turbo C. -So v0.96 is likely the last release of the catdoc programs that build and run +This release of the catdoc programs addresses numerous vulnerabilities +described below. To do so it has updated autoconf/automake tooling to make it +easier to build with Address Sanitizer, and an automake test harness to check +for memory errors. The steps to build it from source changed slightly, see +[INSTALL](INSTALL). + +[vbwagner's upstream]() came back to life after a 9 year absence with a couple +of fixes in November 2025; this fork incorporates them. + +### End of DOS support + +The patched source code _no longer compiles_ in Borland Turbo C; +v0.96 is the last release of the catdoc programs that builds and runs in 16-bit DOS. If anyone cares about DOS support, get in touch! ## File format specifications @@ -63,8 +67,18 @@ The catdoc programs are unsafe C code that parse old files. Unexpected or garbled file content will cause them to crash and running them on a specially-crafted file may allow an attacker to interfere with the operation of your computer. Version 0.97 fixes several memory access errors and Common -Vulnerabilities and Exposures search the commit history for "CVE") but there -may be more. Some were detected by Address Sanitizer tools, see +Vulnerabilities and Exposures reported against various forks and distribution +packages of catdoc over the years, but there may be more. + +This release of the catdoc programs incorporates the Debian patches for the +vulnerabilities +[CVE-2024-54028](https://nvd.nist.gov/vuln/detail/CVE-2024-54028), +[CVE-2024-52035](https://nvd.nist.gov/vuln/detail/CVE-2024-52035), +and +[CVE-2024-48877](https://nvd.nist.gov/vuln/detail/CVE-2024-48877) +identified and addressed by the Cisco Talos team. +See [NEWS](NEWS) and the commit history (search history for "CVE") for other +fixes made. Some were detected by Address Sanitizer tools, see [tests/asan_failures](tests/asan_failures) for more details. ## Documentation, bugs, more information @@ -77,7 +91,7 @@ those who don't have man command (such as MS-DOS users), plain text and PostScript versions of the man pages are in the doc directory. Your bug reports and suggestions are welcome, as are code contributions; -[TODO](TODO) is an incomplete list of things to work on. In particular, if +[TODO.md](TODO.md) is an incomplete list of things to work on. In particular, if you have old MS-Office files from which the catdoc text extraction programs do not produce correct output, please file an issue and attach a small test file. diff --git a/TODO.md b/TODO.md index 2a88106..e192a50 100644 --- a/TODO.md +++ b/TODO.md @@ -1,7 +1,5 @@ # TODO -- [x] Fix "Cannod read", "chatset", "platfom", and other misspellings - ## Test cleanup - [ ] Maybe fix the memory leaks reported by asan so tests don't have to set ASAN_OPTIONS=detect_leaks=0 @@ -16,10 +14,7 @@ ## Other CI issues -- [x] Use full GNU autoconf to create Makefiles. - This will make it easier to build with Address Sanitizer (`-{f,l}asan`) to reproduce some of the above CVEs. -- [x] .github/workflows/c-cpp.yml can be simplified and may not need `make install -- [ ] Check if copr build (using.packit.yaml) of Fedora RPM works. +- [ ] Check if [Fedora copr build](https://copr.fedorainfracloud.org/coprs/skierpage/catdoc/) (using [packit.yaml](packit.yaml)) of Fedora RPM works. ## Research: find more Office 2007 test files diff --git a/catdoc.spec b/catdoc.spec index 5447351..21f32a4 100644 --- a/catdoc.spec +++ b/catdoc.spec @@ -1,10 +1,10 @@ Name: catdoc -Version: v0.97_devel +Version: 0.97 Release: %autorelease Summary: programs which extract text from Microsoft Office 97-2004 files License: GPL-2.0-or-later URL: https://github.com/skierpage/catdoc -Source0: https://github.com/skierpage/%{name}/archive/refs/tags/v%{version}.tar.gz +Source0: catdoc-0.97.tar.gz BuildRequires: gcc BuildRequires: make BuildRequires: tk @@ -38,7 +38,7 @@ wordview is a program that displays Microsoft Word files in a graphical window via Tk. %prep -%autosetup -p1 +%autosetup -p1 -n %{name}-%{version} %build %configure diff --git a/configure b/configure index 8ba7de1..f97b56b 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.72 for catdoc 0.97_devel. +# Generated by GNU Autoconf 2.72 for catdoc 0.97. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, @@ -601,8 +601,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='catdoc' PACKAGE_TARNAME='catdoc' -PACKAGE_VERSION='0.97_devel' -PACKAGE_STRING='catdoc 0.97_devel' +PACKAGE_VERSION='0.97' +PACKAGE_STRING='catdoc 0.97' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1313,7 +1313,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -'configure' configures catdoc 0.97_devel to adapt to many kinds of systems. +'configure' configures catdoc 0.97 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1380,7 +1380,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of catdoc 0.97_devel:";; + short | recursive ) echo "Configuration of catdoc 0.97:";; esac cat <<\_ACEOF @@ -1490,7 +1490,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -catdoc configure 0.97_devel +catdoc configure 0.97 generated by GNU Autoconf 2.72 Copyright (C) 2023 Free Software Foundation, Inc. @@ -1758,7 +1758,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by catdoc $as_me 0.97_devel, which was +It was created by catdoc $as_me 0.97, which was generated by GNU Autoconf 2.72. Invocation command line was $ $0$ac_configure_args_raw @@ -3236,7 +3236,7 @@ fi # Define the identity of the package. PACKAGE='catdoc' - VERSION='0.97_devel' + VERSION='0.97' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -6061,7 +6061,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by catdoc $as_me 0.97_devel, which was +This file was extended by catdoc $as_me 0.97, which was generated by GNU Autoconf 2.72. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6129,7 +6129,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -catdoc config.status 0.97_devel +catdoc config.status 0.97 configured by $0, generated by GNU Autoconf 2.72, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index c65cbe9..7c70e18 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT([catdoc],[0.97_devel]) +AC_INIT([catdoc],[0.97]) AC_CONFIG_AUX_DIR([build-aux]) AM_INIT_AUTOMAKE([-Wall foreign])