Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions src/ramcore/SamParser.cxx
Original file line number Diff line number Diff line change
@@ -1,9 +1,33 @@
#include "ramcore/SamParser.h"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'ramcore/SamParser.h' file not found [clang-diagnostic-error]

#include "ramcore/SamParser.h"
         ^

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lukedev45 please take a look here and fix the conflicts.

#include <cstring>
#include <cerrno>
#include <climits>
#include <cstdlib>
#include <cstring>
#include <iostream>

namespace ramcore {

static bool StrictAtoi(const char* token, const char* field_name, int& result) {
char* end = nullptr;
errno = 0;
long val = strtol(token, &end, 10);

if (end == token || *end != '\0') {
std::cerr << "SamParser: invalid value '" << token
<< "' for field " << field_name
<< " (not a number)\n";
return false;
}
if (errno == ERANGE || val < INT_MIN || val > INT_MAX) {
std::cerr << "SamParser: value '" << token
<< "' for field " << field_name
<< " is out of range\n";
return false;
}
result = static_cast<int>(val);
return true;
}

void StripCRLF(char* str) {
size_t len = strlen(str);
while (len > 0 && (str[len-1] == '\n' || str[len-1] == '\r')) {
Expand Down Expand Up @@ -65,14 +89,14 @@ bool SamParser::ParseLine(char* line, SamRecord& record) {
while (token) {
switch (field_num) {
case 0: record.qname = token; break;
case 1: record.flag = atoi(token); break;
case 1: if (!StrictAtoi(token, "FLAG", record.flag)) return false; break;
case 2: record.rname = token; break;
case 3: record.pos = atoi(token); break;
case 4: record.mapq = atoi(token); break;
case 3: if (!StrictAtoi(token, "POS", record.pos)) return false; break;
case 4: if (!StrictAtoi(token, "MAPQ", record.mapq)) return false; break;
case 5: record.cigar = token; break;
case 6: record.rnext = token; break;
case 7: record.pnext = atoi(token); break;
case 8: record.tlen = atoi(token); break;
case 7: if (!StrictAtoi(token, "PNEXT", record.pnext)) return false; break;
case 8: if (!StrictAtoi(token, "TLEN", record.tlen)) return false; break;
case 9: record.seq = token; break;
case 10:
StripCRLF(token);
Expand Down
38 changes: 37 additions & 1 deletion test/ramcoretests.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -462,4 +462,40 @@ TEST_F(ramcoreTest, QUALEncodingDecodingModes)

std::remove(samFile);
std::remove(ramFile);
}
}

TEST_F(ramcoreTest, SamParserRejectsNonNumericFields)
{
const char *badSam = "test_bad_fields.sam";
const char *rntupleFile = "test_bad_fields.root";

{
std::ofstream sam(badSam);
sam << "@HD\tVN:1.6\tSO:unsorted\n";
sam << "@SQ\tSN:chr1\tLN:10000\n";
// Non-numeric FLAG
sam << "read1\tBROKEN\tchr1\t100\t60\t50M\t*\t0\t0\t"
<< std::string(50, 'A') << "\t*\n";
// Non-numeric POS
sam << "read2\t0\tchr1\tBAD\t60\t50M\t*\t0\t0\t"
<< std::string(50, 'A') << "\t*\n";
// Trailing characters in MAPQ
sam << "read4\t0\tchr1\t300\t60abc\t50M\t*\t0\t0\t"
<< std::string(50, 'A') << "\t*\n";
// Out-of-range POS
sam << "read5\t0\tchr1\t99999999999999\t60\t50M\t*\t0\t0\t"
<< std::string(50, 'A') << "\t*\n";
// Valid record with negative TLEN
sam << "read3\t0\tchr1\t200\t60\t50M\t*\t0\t-150\t"
<< std::string(50, 'A') << "\t*\n";
}

samtoramntuple(badSam, rntupleFile, false, false, false, 505, 0);

auto reader = ROOT::RNTupleReader::Open("RAM", rntupleFile);
EXPECT_EQ(reader->GetNEntries(), 1)
<< "Only the valid record should be written; corrupted records must be rejected";

std::remove(badSam);
std::remove(rntupleFile);
}