diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py new file mode 100644 index 0000000..98c9e6a --- /dev/null +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -0,0 +1,32 @@ +"""Add get_other_papers flag to myads table + +Revision ID: 717c2970ff42 +Revises: af63c0205b19 +Create Date: 2025-07-15 16:14:53.689180 + +""" + +# revision identifiers, used by Alembic. +revision = '717c2970ff42' +down_revision = 'af63c0205b19' + +from alembic import op +import sqlalchemy as sa + + +from sqlalchemy.dialects import postgresql + +def upgrade(): + #with app.app_context() as c: + # db.session.add(Model()) + # db.session.commit() + + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('true'), nullable=False)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('myads', 'get_other_papers') + # ### end Alembic commands ### diff --git a/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py b/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py index b3b2d0d..d5a24e4 100644 --- a/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py +++ b/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py @@ -1,12 +1,14 @@ """Adding library_id foreign key to user + Revision ID: af63c0205b19 -Revises: dcda14f51cff +Revises: 4cc89f2f896b Create Date: 2025-07-09 15:00:40.189587 """ # revision identifiers, used by Alembic. revision = 'af63c0205b19' -down_revision = 'dcda14f51cff' +down_revision = '4cc89f2f896b' + from alembic import op import sqlalchemy as sa @@ -30,7 +32,6 @@ def upgrade(): # Delete link_server from user_data # But save the link_server in the library table instead to be accessible via users.library_id - users = bind.execute("SELECT id, user_data FROM users WHERE user_data ? 'link_server'") for user_id, user_data in users: diff --git a/vault_service/models.py b/vault_service/models.py index c19d866..b3abd32 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -81,6 +81,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) + get_other_papers = Column(Boolean, nullable=False, server_default=sa.text('true'), default=True) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index dcf1de4..d943729 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -973,7 +973,7 @@ def test_scixplorer_referrer_updates_all_notifications(self): self.assertTrue(notification1.scix_ui) self.assertTrue(notification2.scix_ui) self.assertTrue(notification3.scix_ui) - + # Create a fourth notification WITHOUT Scixplorer Host (query type) r = self.client.post( url_for('user.myads_notifications'), @@ -1005,6 +1005,289 @@ def test_scixplorer_referrer_updates_all_notifications(self): self.assertTrue(notification3.scix_ui) self.assertTrue(notification4.scix_ui) + @httpretty.activate + def test_get_other_papers_flag_creation(self): + '''Tests creation of arXiv daily notifications with get_other_papers flag''' + httpretty.register_uri( + httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), + content_type='application/json', + status=200, + body="""{ + "responseHeader":{ + "status":0, "QTime":0, + "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, + "response":{"numFound":10456930,"start":0,"docs":[ + { "bibcode":"2005JGRC..110.4002G" }, + { "bibcode":"2005JGRC..110.4003N" }, + { "bibcode":"2005JGRC..110.4004Y" }]}}""") + + # Test 1: Create arXiv daily notification with default get_other_papers=True + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'dark matter', + 'classes': ['astro-ph.CO'], + 'frequency': 'daily'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'daily') + self.assertEqual(r.json['get_other_papers'], True) # Should default to True + myads_id_default = r.json['id'] + + # Test 2: Create arXiv daily notification with explicit get_other_papers=False + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'black holes', + 'classes': ['astro-ph.HE'], + 'frequency': 'daily', + 'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'daily') + self.assertEqual(r.json['get_other_papers'], False) + myads_id_false = r.json['id'] + + # Test 3: Create arXiv weekly notification (should not include get_other_papers in response) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'galaxies', + 'classes': ['astro-ph.GA'], + 'frequency': 'weekly'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'weekly') + self.assertNotIn('get_other_papers', r.json) # Should not be included for weekly + + # Test 4: Create non-arxiv template (should not include get_other_papers) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'keyword', + 'data': 'machine learning'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'keyword') + self.assertNotIn('get_other_papers', r.json) # Should not be included for keyword template + + @httpretty.activate + def test_get_other_papers_flag_editing(self): + '''Tests editing the get_other_papers flag for arXiv daily notifications''' + httpretty.register_uri( + httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), + content_type='application/json', + status=200, + body="""{ + "responseHeader":{ + "status":0, "QTime":0, + "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, + "response":{"numFound":10456930,"start":0,"docs":[ + { "bibcode":"2005JGRC..110.4002G" }]}}""") + + # Create an arXiv daily notification + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'neutron stars', + 'classes': ['astro-ph.HE'], + 'frequency': 'daily'}), + content_type='application/json') + + self.assertStatus(r, 200) + myads_id = r.json['id'] + self.assertEqual(r.json['get_other_papers'], True) # Default + + # Edit to enable get_other_papers + r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': True}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['get_other_papers'], True) + + # Edit to disable get_other_papers + r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['get_other_papers'], False) + + # Test editing a weekly arXiv notification (should not affect get_other_papers) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'cosmology', + 'classes': ['astro-ph.CO'], + 'frequency': 'weekly'}), + content_type='application/json') + + self.assertStatus(r, 200) + weekly_myads_id = r.json['id'] + + # Try to edit get_other_papers on weekly notification (should be ignored) + r = self.client.put(url_for('user.myads_notifications', myads_id=weekly_myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertNotIn('get_other_papers', r.json) # Should not be included for weekly + + def test_get_other_papers_query_generation(self): + '''Tests that _create_myads_query generates correct queries based on get_other_papers flag''' + from vault_service.views.user import _create_myads_query + + # Test 1: Daily arXiv with keywords, get_other_papers=True (should create 2 queries) + queries = _create_myads_query('arxiv', 'daily', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=True) + + self.assertEqual(len(queries), 2, "Should create 2 queries when get_other_papers=True") + + # First query: keyword matches + self.assertIn('dark matter', queries[0]['q']) + self.assertNotIn(' NOT ', queries[0]['q']) + self.assertEqual(queries[0]['sort'], 'score desc, date desc') + + # Second query: other recent papers (NOT keyword matches) + self.assertIn('dark matter', queries[1]['q']) + self.assertIn(' NOT ', queries[1]['q']) + self.assertEqual(queries[1]['sort'], 'date desc') + + # Test 2: Daily arXiv with keywords, get_other_papers=False (should create 1 query) + queries = _create_myads_query('arxiv', 'daily', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=False) + + self.assertEqual(len(queries), 1, "Should create 1 query when get_other_papers=False") + + # Only query: keyword matches + self.assertIn('dark matter', queries[0]['q']) + self.assertNotIn(' NOT ', queries[0]['q']) + self.assertEqual(queries[0]['sort'], 'score desc, date desc') + + # Test 3: Weekly arXiv (get_other_papers should not affect) + queries_weekly_true = _create_myads_query('arxiv', 'weekly', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=True) + queries_weekly_false = _create_myads_query('arxiv', 'weekly', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=False) + + self.assertEqual(len(queries_weekly_true), 1, "Weekly should always create 1 query") + self.assertEqual(len(queries_weekly_false), 1, "Weekly should always create 1 query") + self.assertEqual(queries_weekly_true[0]['q'], queries_weekly_false[0]['q']) + + # Test 4: Non-arXiv template (get_other_papers should not affect) + queries_keyword = _create_myads_query('keyword', 'weekly', 'machine learning', + get_other_papers=False) + self.assertEqual(len(queries_keyword), 3, "Keyword template should create 3 queries (recent, trending, useful)") + + def test_get_myads_includes_get_other_papers_flag(self): + '''Tests that get_myads endpoint includes get_other_papers flag appropriately''' + with self.app.session_scope() as session: + # Create test data directly in database + user = User(id=102) + session.add(user) + session.commit() + + # Daily arXiv notification with get_other_papers=True + myads_daily_true = MyADS( + user_id=102, + type='template', + name='Daily True', + template='arxiv', + frequency='daily', + active=True, + stateful=False, + classes=['astro-ph.CO'], + data='dark matter', + get_other_papers=True + ) + session.add(myads_daily_true) + + # Daily arXiv notification with get_other_papers=False + myads_daily_false = MyADS( + user_id=102, + type='template', + name='Daily False', + template='arxiv', + frequency='daily', + active=True, + stateful=False, + classes=['astro-ph.HE'], + data='black holes', + get_other_papers=False + ) + session.add(myads_daily_false) + + # Weekly arXiv notification (should not include get_other_papers) + myads_weekly = MyADS( + user_id=102, + type='template', + name='Weekly', + template='arxiv', + frequency='weekly', + active=True, + stateful=False, + classes=['astro-ph.GA'], + data='galaxies', + get_other_papers=False # Not relevant for weekly + ) + session.add(myads_weekly) + + # Keyword notification (should not include get_other_papers) + myads_keyword = MyADS( + user_id=102, + type='template', + name='Keyword', + template='keyword', + frequency='weekly', + active=True, + stateful=False, + data='machine learning', + get_other_papers=False # Not relevant for keyword + ) + session.add(myads_keyword) + + session.commit() + + # Get all myADS setups for user 102 + r = self.client.get(url_for('user.get_myads', user_id='102'), + headers={'Authorization': 'secret'}) + + self.assertStatus(r, 200) + self.assertEqual(len(r.json), 4) + + # # Find each notification and check get_other_papers field + daily_true = next(n for n in r.json if n['name'] == 'Daily True') + daily_false = next(n for n in r.json if n['name'] == 'Daily False') + weekly = next(n for n in r.json if n['name'] == 'Weekly') + keyword = next(n for n in r.json if n['name'] == 'Keyword') + + # # Daily arXiv notifications should include get_other_papers + self.assertIn('get_other_papers', daily_true) + self.assertEqual(daily_true['get_other_papers'], True) + + self.assertIn('get_other_papers', daily_false) + self.assertEqual(daily_false['get_other_papers'], False) + + # # Weekly arXiv and keyword notifications should NOT include get_other_papers + self.assertNotIn('get_other_papers', weekly) + self.assertNotIn('get_other_papers', keyword) + def test_library_integration(self): '''Tests library integration with user data storage''' @@ -1239,4 +1522,4 @@ def test_library_data_migration_logic(self): self.assertEqual(user.user_data['new_setting'], 'value') if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 35370ed..8929899 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -305,6 +305,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): return json.dumps({'msg': 'No notification type passed'}), 400 scix_ui_header = urlparse.urlparse(request.referrer).netloc in current_app.config.get("NECTAR_REFERRERS", ["dev.scixplorer.org"]) + get_other_papers = True with current_app.session_scope() as session: try: @@ -368,6 +369,8 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): data = payload.get('data', None) stateful = False frequency = payload.get('frequency', 'daily') + if frequency == 'daily': + get_other_papers = payload.get('get_other_papers', True) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: @@ -405,6 +408,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): template=template, classes=classes, scix_ui=scix_ui_header, + get_other_papers=get_other_papers, data=data) else: return json.dumps({'msg': 'Bad data passed; type must be query or template'}), 400 @@ -447,6 +451,9 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -556,6 +563,10 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= setup.stateful = payload.get('stateful', setup.stateful) setup.frequency = payload.get('frequency', setup.frequency) + # Only update get_other_papers for daily arXiv notifications and if the payload has a value + if setup.template == 'arxiv' and setup.frequency == 'daily' and payload.get('get_other_papers', None) is not None: + setup.get_other_papers = payload.get('get_other_papers') + try: session.begin_nested() except exc.StatementError as e: @@ -583,6 +594,10 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -616,7 +631,7 @@ def execute_myads_query(myads_id): data = setup.data if data is None and setup.query_id: data = _get_general_query_data(session, setup.query_id) - query = _create_myads_query(setup.template, setup.frequency, data, classes=setup.classes) + query = _create_myads_query(setup.template, setup.frequency, data, classes=setup.classes, get_other_papers=setup.get_other_papers) return json.dumps(query) @@ -636,12 +651,13 @@ def _get_general_query_data(session, query_id): data = urlparse.parse_qs(query) return data -def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None): +def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None, get_other_papers=True): """ Creates a query based on the stored myADS setup (for templated queries only) :param frequency: daily or weekly :param data: keywords or other stored query template data :param classes: arXiv classes, only required for arXiv template queries + :param get_other_papers: for arXiv daily queries, whether to include "other recent papers" query :return: out: list of dicts; constructed query, dates are such that it's meant to be run today: [{q: query params, sort: sort string}] @@ -678,9 +694,14 @@ def _create_myads_query(template_type, frequency, data, classes=None, start_isod classes = 'arxiv_class:(' + ' OR '.join([x + '.*' if '.' not in x else x for x in tmp]) + ')' keywords = data if frequency == 'daily': - connector = [' ', ' NOT '] - # keyword search should be sorted by score, "other recent" should be sorted by bibcode - sort_w_keywords = ['score desc, date desc', 'date desc'] + if get_other_papers: + connector = [' ', ' NOT '] + # keyword search should be sorted by score, "other recent" should be sorted by bibcode + sort_w_keywords = ['score desc, date desc', 'date desc'] + else: + # Only include keyword matches, skip "other recent papers" + connector = [' '] + sort_w_keywords = ['score desc, date desc'] elif frequency == 'weekly': connector = [' '] sort_w_keywords = ['score desc, date desc'] @@ -791,14 +812,17 @@ def get_myads(user_id, start_isodate=None): query = None else: data = _get_general_query_data(session, s.query_id) - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate, get_other_papers=s.get_other_papers) else: qid = None data = s.data - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) + if s.template == 'arxiv' and s.frequency == 'daily': + o['get_other_papers'] = s.get_other_papers + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate, get_other_papers=s.get_other_papers) o['qid'] = qid o['query'] = query + output.append(o)