diff --git a/readme.md b/readme.md
index 41d7c31..00c1b16 100644
--- a/readme.md
+++ b/readme.md
@@ -46,11 +46,9 @@ Creating and downloading the spreadsheet:
Normalizing the spreadsheet:
-In the downloaded CSV, combine or split up the following fields:
+** The script should normalize all of the following fields itself! ** BUT, if you encounter any trouble, you can also manually normalize the CSV by combining or splitting up the following fields:
* Combine the columns 'Notes,' 'Alternative Title,' 'Credits' into a single column 'Notes'
* Combine the columns 'Medium of original,' 'Dimensions of original,' 'Original video standard,' 'Generation' columns into a single column 'Medium of original'
- * Normalize the 'frame rate' column into numbers only (e.g., remove the word 'fps')
- * Split the 'video size' column into 'Video height' and 'Video width'; only use numbers (e.g., turn '640x480' into '640' and '480')
* Add 'urn:bampfa_accession_number:' (no quotes) before each the accession number in the 'PFA full accession number' column
diff --git a/rs2ia.py b/rs2ia.py
index c41da9d..d75f786 100644
--- a/rs2ia.py
+++ b/rs2ia.py
@@ -119,6 +119,12 @@ def __init__(
self._user = _user
self.rsAPI = ResourceSpaceAPI(_user)
+ # initializing metadata attributes required/desired by IA that don't appear in RS
+ self.description = ""
+ self.source = ""
+ self.externalidentifier = ""
+ self.date = ""
+
def get_local_asset_path(self):
# see https://www.resourcespace.com/knowledge-base/api/get_resource_path
# construct parameters of API call as a string
@@ -215,35 +221,90 @@ def post_to_ia(self):
elif self.mediaType == 'mp3':
ia_mediatype = 'audio'
- md = {
- # LET'S THINK ABOUT HOW TO MAKE THIS SET OF MD MORE AGNOSTIC/GENERALIZABLE
+ '''
+ Normalizing columns from csv for use in metadata dict.
+ This is required because the RS and IA metadata fields do not directly
+ map to each other. Each field is initialized to "" so that the metadata
+ dict doesn't complain that it's missing; blank fields will be removed later.
+ '''
+ # concatenate 'description' fields
+ if self.assetMetadata['Notes'] : # if the metadata field exists (as a string), then add it to the dictionary value
+ self.description = "Notes: " + self.assetMetadata['Notes'] + ". "
+ if self.assetMetadata['Description'] :
+ self.description = "Description: " + self.assetMetadata['Description'] + ". "
+ if self.assetMetadata['Alternative Title'] :
+ self.description += "Alternative Title: " + self.assetMetadata['Alternative Title'] + ". "
+ if self.assetMetadata['Credits'] :
+ self.description += "Credits: " + self.assetMetadata['Credits']
+ # concatenate 'source' fields
+ if self.assetMetadata['Medium of original'] :
+ self.source = "Medium of original: " + self.assetMetadata['Medium of original'] + ". "
+ if self.assetMetadata['Dimensions of original'] :
+ self.source += "Dimensions of original: " + self.assetMetadata['Dimensions of original'] + ". "
+ # add 'urn:bampfa_accession_number:' to accession # (this conforms to IA style guide)
+ if self.assetMetadata['PFA full accession number'] :
+ self.externalidentifier = "urn:bampfa_accession_number:" + self.assetMetadata['PFA full accession number']
+ if self.assetMetadata['Date of recording'] :
+ self.date = self.assetMetadata['Date of recording']
+ elif self.assetMetadata['Release Date'] :
+ self.date = self.assetMetadata['Release Date']
+ # audio- and video-specific fields
+ if ia_mediatype == 'movies' :
+ if self.assetMetadata['Original video format'] :
+ self.source += "Original video format: " + self.assetMetadata['Original video format'] + ". "
+ if self.assetMetadata['Original video standard'] :
+ self.source += "Original video standard: " + self.assetMetadata['Original video standard'] + ". "
+ if self.assetMetadata['Generation'] :
+ self.source += "Generation: " + self.assetMetadata['Generation']
+ elif ia_mediatype == 'audio' :
+ if self.assetMetadata['PFA film series'] :
+ self.description = "Pacific Film Archive film series: " + self.assetMetadata['PFA film series'] + ". "
+ if self.assetMetadata['Event title'] :
+ self.description = "Event title: " + self.assetMetadata['Event title'] + ". "
+ if self.assetMetadata['Speaker/Interviewee'] :
+ self.description = "Speaker/Interviewee: " + self.assetMetadata['Speaker/Interviewee'] + ". "
+ if self.assetMetadata['Subject(s): Film title(s)'] :
+ self.description = "Subject(s): Film title(s): " + self.assetMetadata['Subject(s): Film title(s)'] + ". "
+ if self.assetMetadata['Subject(s): Topics(s)'] :
+ self.description = "Subject(s): Topics(s): " + self.assetMetadata['Subject(s): Topics(s)'] + ". "
+
+ # general MD dict
+ general_md = {
'collection': self.collection,
'collection': self.collection2, # this overrides the previous line
- 'rights': 'This is a rights statement',
+ 'rights': self.assetMetadata['Copyright statement'],
'mediatype': ia_mediatype,
#'licenseurl': self.license,
- 'creator': self.assetMetadata['Directors / Filmmakers'],
'contributor': self.assetMetadata['Resource type'],
'identifier': identifier,
+ 'external-identifier': self.externalidentifier,
'title': self.assetMetadata['Title'],
- 'date': self.assetMetadata['Release Date'],
- # Original columns 'Notes,' 'Alternative Title,' 'Credits' should be concatenated manually by operator into single column 'Notes'
- 'description': self.assetMetadata['Notes'],
- # Original columns 'Medium of original,' 'Dimensions of original,' 'Original video standard,' 'Generation' columns should be concatenated manually by operator into single column 'Medium of original'
- 'source': self.assetMetadata['Medium of original'],
- # 'frame rate' column should be normalized into numbers manually by operator
- 'frames_per_second': self.assetMetadata['Frame rate'],
- # 'video size' column should be split into 'Video height' and 'Video width' numbers manually by operator
- # 'source_pixel_width': self.assetMetadata['Video height'],
- # 'source_pixel_height': self.assetMetadata['Video width'],
- # 'PFA full accession number' column should be normalized to 'urn:bampfa_accession_number:XXXX' manually by operator
- 'external-identifier': self.assetMetadata['PFA full accession number'],
- 'condition': self.assetMetadata['Original Material Condition'],
+ 'date': self.date,
+ 'description': self.description,
+ 'source': self.source,
+ 'language': self.assetMetadata['Language']
+ }
+ movies_md = {
'sound': self.assetMetadata['PFA item sound characteristics'],
- 'color': self.assetMetadata['Color characteristics']
+ 'color': self.assetMetadata['Color characteristics'],
+ 'creator': self.assetMetadata['Directors / Filmmakers'],
}
+ # audio_md = {
+ # # need appropriate creator field for audio; will all audio be PFA lecture series?
+ # }
+
+ # concatenate dictionaries
+ if ia_mediatype == 'movies' :
+ md = dict(general_md)
+ md.update(movies_md)
+ elif ia_mediatype == 'audio' :
+ md = dict(general_md)
+
# get rid of empty values in the md dictionary
md = {k: v for k, v in md.items() if v not in (None,'')}
+ # remove line breaks that display as literal "
"
+ md = {v.replace('
', ' '): k
+ for k, v in md.items()}
# archive.org Python Library, 'uploading': https://archive.org/services/docs/api/internetarchive/quickstart.html#uploading
print("ACCESS COPY FILENAME:")
print(identifier)