Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions scan_explorer_service/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,82 @@ def test_article_collection(self):
self.assertStatus(r, 200)
self.assertEqual(data, {'id': 'journalvolume', 'selected_page': 100})

def test_put_collection_with_articles(self):
"""put_collection bulk-inserts articles and links them to pages."""
collection_json = {
'type': 'type',
'journal': self.collection.journal,
'volume': self.collection.volume,
'pages': [{
'name': 'pageA',
'color_type': 'BW',
'page_type': 'Normal',
'label': '1',
'width': 100,
'height': 100,
'volume_running_page_num': 1,
'articles': [{'bibcode': '2000ApJ...001..001A'}],
}]
}
url = url_for("metadata.put_collection")
r = self.client.put(url, json=collection_json)
self.assertStatus(r, 200)

collection_id = r.get_json()['id']
articles = self.app.db.session.query(Article).filter(Article.collection_id == collection_id).all()
self.assertEqual(len(articles), 1)
self.assertEqual(articles[0].bibcode, '2000ApJ...001..001A')

pages = self.app.db.session.query(Page).filter(Page.collection_id == collection_id).all()
self.assertEqual(len(pages), 1)

def test_put_collection_deduplicates_articles(self):
"""An article appearing in multiple pages is inserted only once."""
collection_json = {
'type': 'type',
'journal': self.collection.journal,
'volume': self.collection.volume,
'pages': [
{
'name': 'pageA',
'color_type': 'BW',
'page_type': 'Normal',
'label': '1',
'width': 100,
'height': 100,
'volume_running_page_num': 1,
'articles': [{'bibcode': '2000ApJ...001..001A'}],
},
{
'name': 'pageB',
'color_type': 'BW',
'page_type': 'Normal',
'label': '2',
'width': 100,
'height': 100,
'volume_running_page_num': 2,
'articles': [{'bibcode': '2000ApJ...001..001A'}],
},
]
}
url = url_for("metadata.put_collection")
r = self.client.put(url, json=collection_json)
self.assertStatus(r, 200)

collection_id = r.get_json()['id']
articles = self.app.db.session.query(Article).filter(Article.collection_id == collection_id).all()
self.assertEqual(len(articles), 1)

pages = self.app.db.session.query(Page).filter(Page.collection_id == collection_id).all()
self.assertEqual(len(pages), 2)

from scan_explorer_service.models import page_article_association_table as pat
page_ids = [p.id for p in pages]
links = self.app.db.session.execute(
pat.select().where(pat.c.page_id.in_(page_ids))
).fetchall()
self.assertEqual(len(links), 2)


if __name__ == '__main__':
unittest.main()
57 changes: 48 additions & 9 deletions scan_explorer_service/views/metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime, timezone
from typing import Union
from flask import Blueprint, current_app, jsonify, request
from scan_explorer_service.utils.db_utils import article_get_or_create, article_overwrite, collection_overwrite, page_get_or_create, page_overwrite
from scan_explorer_service.models import Article, Collection, Page
from scan_explorer_service.models import Article, Collection, Page, page_article_association_table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from flask_discoverer import advertise
from scan_explorer_service.utils.search_utils import *
from scan_explorer_service.views.view_utils import ApiErrors
Expand Down Expand Up @@ -76,16 +78,53 @@ def put_collection():
try:
collection = Collection(**json)
collection_overwrite(session, collection)

for page_json in json.get('pages', []):
page_json['collection_id'] = collection.id
page = page_get_or_create(session, **page_json)

for article_json in page_json.get('articles', []):
article_json['collection_id'] = collection.id
page.articles.append(article_get_or_create(session, **article_json))
now = datetime.now(timezone.utc).replace(tzinfo=None)
pages_data = []
articles_data = {}
page_article_data = []

session.add(page)
for page_json in json.get('pages', []):
page_json['collection_id'] = collection.id
articles = page_json.pop('articles', [])
page = Page(**page_json)
pages_data.append({
'id': page.id,
'name': page.name,
'label': page.label,
'format': page.format,
'color_type': page.color_type,
'page_type': page.page_type,
'width': page.width,
'height': page.height,
'collection_id': page.collection_id,
'volume_running_page_num': page.volume_running_page_num,
'created': now,
'updated': now,
})
for article_json in articles:
bibcode = article_json['bibcode']
if bibcode not in articles_data:
articles_data[bibcode] = {
'id': bibcode,
'bibcode': bibcode,
'collection_id': collection.id,
'created': now,
'updated': now,
}
page_article_data.append({
'page_id': page.id,
'article_id': bibcode,
})

if pages_data:
session.bulk_insert_mappings(Page, pages_data)
if articles_data:
session.execute(
pg_insert(Article.__table__).values(list(articles_data.values())).on_conflict_do_nothing()
)
if page_article_data:
session.execute(page_article_association_table.insert(), page_article_data)
session.commit()

return jsonify({'id': collection.id}), 200
Expand Down