From 87da3bf782cf72a698abcc66b9f137c7b6eeac0d Mon Sep 17 00:00:00 2001 From: mttk Date: Wed, 6 Mar 2019 11:41:08 +0100 Subject: [PATCH] fix arxiv url extract --- sotawhat/sotawhat.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sotawhat/sotawhat.py b/sotawhat/sotawhat.py index 8c9d6f1..4e90f97 100644 --- a/sotawhat/sotawhat.py +++ b/sotawhat/sotawhat.py @@ -50,10 +50,18 @@ def get_next_result(lines, start): """ result = {} - idx = lines[start + 3][10:].find('"') - result['main_page'] = lines[start + 3][9:10 + idx] - idx = lines[start + 4][23:].find('"') - result['pdf'] = lines[start + 4][22: 23 + idx] + '.pdf' + + # these can change with arxiv updates + abstract_line = 2 + abstract_begin_offset = 47 + + pdf_line = 3 + pdf_begin_offset = 22 + + idx = lines[start + abstract_line][abstract_begin_offset:].find('"') + result['main_page'] = lines[start + abstract_line][abstract_begin_offset:abstract_begin_offset + idx] + idx = lines[start + pdf_line][pdf_begin_offset:].find('"') + result['pdf'] = lines[start + pdf_line][pdf_begin_offset: pdf_begin_offset + idx] + '.pdf' start += 4