dkrasne
diff --git a/‎src/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎src/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/about.md‎
Lines changed: 44 additions & 9 deletions b/‎src/about.md‎
Lines changed: 44 additions & 9 deletions
diff --git a/‎src/data/intxt_network_graph.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/intxt_network_graph.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/intxts_full.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/intxts_full.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/intxts_full_modified.json‎
Lines changed: 1 addition & 0 deletions b/‎src/data/intxts_full_modified.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/data/model_json_backup.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/model_json_backup.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/nodegoat_data.json.py‎
Lines changed: 91 additions & 18 deletions b/‎src/data/nodegoat_data.json.py‎
Lines changed: 91 additions & 18 deletions
diff --git a/‎src/data/nodegoat_tables.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/nodegoat_tables.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/objects_json_backup.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/objects_json_backup.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/sankey_data.json‎
Lines changed: 1 addition & 1 deletion b/‎src/data/sankey_data.json‎
Lines changed: 1 addition & 1 deletion
@@ -2,6 +2,7 @@
 d3_practice.md
 network_practice.md
 sankey_test.md
+sandbox.md
 components/sankey.js
 components/sankey_original.js
 components
@@ -96,6 +96,8 @@ While the project overall is strictly concerned with Latin poetry, Greek texts a
 
 Finally, each `word-level intertext` records at least one scholarly source (sometimes the original publication proposing the intertext, and sometimes a commentary), which are collectively stored in a `publication` table. (It is also possible to record an ancient work as the scholarly source, since occasionally the explicit recognition of an intertext goes back to a grammarian of antiquity.) This information is not currently displayed in any fashion, but it will eventually be shown when a passage is selected.
 
+Some additional information about particulars of the database and project can be found on the [Frequently Asked Questions page](./faq).
+
 ### Data Pipeline
 
 *Non-coders may wish to [skip this part](#visualizations)!*
@@ -161,7 +163,7 @@ def table_to_df(table, cols_dict):
 
 The data loader then joins the disparate metrical data into a single dataframe and then returns it to a single restructured JSON object; and it converts each of the other dataframes to a JSON object, which are collectively stored in an array. These are all saved to files that are automatically committed to GitHub.
 
-The same Python data loader also creates network nodes and edges from the data in order to enable visualization of the intertexts as [Sankey diagrams](https://en.wikipedia.org/wiki/Sankey_diagram). (I chose these over traditional [network graphs](https://guides.library.yale.edu/dh/graphs) since the sequential nature of an intertextual network makes it well-suited to visualizing as a flow-path.) While part of the network creation is done automatically by the d3 Sankey module, the initial preparation of nodes and edges is performed in the data loader; further filtering, when necessary, is done on the fly based on the user's selections.
+The same Python data loader also creates network nodes and edges from the data in order to enable visualization of the intertexts as [Sankey diagrams](https://en.wikipedia.org/wiki/Sankey_diagram). (I chose these over traditional [network graphs](https://guides.library.yale.edu/dh/graphs) since the sequential nature of an intertextual network makes it well-suited to visualizing as a flow-path.) While part of the network creation is done automatically by the d3 Sankey module, the initial preparation of nodes and edges is performed in the data loader; further filtering, when necessary, is done on the fly based on the user&rsquo;s selections.
 
 <p><details>
 <summary>Click to view the two custom functions for this stage.</summary>
@@ -451,12 +453,28 @@ for (let meter in meters) {
 
 // Define grid height based on number of lines.
 
-const gridY = (lineRange.lastLine - lineRange.firstLine) + 1;  // I may need to modify this to accomodate passages with extra lines
+let gridYInterim = (lineRange.lastLine - lineRange.firstLine) + 1;
+let extraLineSet;
+
+// make a set of any extranumerical lines
+
+if (wordsFiltered.filter(word => word.line_num_modifier).length > 0) {
+	extraLineSet = new Set(
+		wordsFiltered.filter(word => word.line_num_modifier)
+					.map(word => ({lineNum: word.line_num, lineNumMod: word.line_num_modifier, lineNumString: `${word.line_num}${word.line_num_modifier}`}))
+	);
+	gridYInterim += extraLineSet.size;	// if there are extranumerical lines, increase the height multiplier accordingly, so that cells remain square
+}
+
+const extraLines = extraLineSet ? Array.from(extraLineSet) : [];
+
+const gridY = gridYInterim;
 
 const cellSize = 20;
 const gridHeight = gridY * cellSize;
 const gridWidth = gridX * cellSize;
 
+
 // Create plot, conditional on the existence of intertexts
 
 // set tick range; increase step every ten (max) intertexts
@@ -469,6 +487,18 @@ else {
 };
 let tickRange = d3.range(Math.min(...intxtCnts), Math.max(...intxtCnts)+1, step);
 
+let lineVals = d3.range(lineRange.firstLine, lineRange.lastLine +1);
+
+// if there are extranumerical lines, insert them into the line values array
+
+for (let line of extraLines) {
+	let insertAfter = line.lineNum;
+	let insertAfterIndex = lineVals.indexOf(insertAfter) + 1;
+	let insertString = line.lineNumString;
+	lineVals.splice(insertAfterIndex, 0, insertString);
+}
+
+
 const plotDisplay = intertextsArr.every(intxt => intxt.intxtCnt === 0) ? null : Plot.plot({
 	grid: true,
 	x: {
@@ -479,7 +509,8 @@ const plotDisplay = intertextsArr.every(intxt => intxt.intxtCnt === 0) ? null :
 		},
 	y: {
 		label: 'Line', 
-		domain: d3.range(lineRange.firstLine, lineRange.lastLine +1),
+		// domain: d3.range(lineRange.firstLine, lineRange.lastLine +1),
+		domain: lineVals,
 		tickSize: 0,
 		},
 	color: {scheme: "Greens", 
@@ -550,24 +581,28 @@ if (plotCurrSelect) {
 
 	currWordId = plotCurrSelect.wordObj.obj_id;	// set current word ID to the selected word
 
+	let intertextsTableExtended = intertextsTable.concat(intertextsModTable);
+
 	// create functions for getting a word's immediate ancestors or descendants
 	function getWordAncestors(currWordId){
-		for (let i in intertextsTable) {
-			let intxt = intertextsTable[i];
+		for (let i in intertextsTableExtended) {
+			let intxt = intertextsTableExtended[i];
 			// for each intertext in the intertexts table, if its target ID matches the focus word (either the selected word or one of its ancestors), add it to the list of ancestor intertexts and add its source to the list of words to be processed.
 			if (currWordId === intxt.target_word_id) {
 				ancestorIntertexts.push(intxt);
 				ancestorWordIDs.push(intxt.source_word_id);
+				wordSankeyIntxtIDs.push(intxt.intxt_id);
 			}
 		}
 	}
 	function getWordDescendants(currWordId){
-		for (let i in intertextsTable) {
-			let intxt = intertextsTable[i];
+		for (let i in intertextsTableExtended) {
+			let intxt = intertextsTableExtended[i];
 			// for each intertext in the intertexts table, if its source ID matches the focus word (either the selected word or one of its descendants), add it to the list of descendant intertexts and add its target to the list of words to be processed.
 			if (currWordId === intxt.source_word_id) {
 				descendantIntertexts.push(intxt);
 				descendantWordIDs.push(intxt.target_word_id);
+				wordSankeyIntxtIDs.push(intxt.intxt_id);
 			}
 		}
 	}
@@ -623,9 +658,9 @@ The colors (which distinguish between authors in the passage-level and full inte
 
 ## Next Steps
 
-In addition to continuing database input, the code needs to be tweaked in order to handle extranumerical lines (such as 845a, which would come between 845 and 846) and alternate readings.
+The main focus for the near future is on entering additional intertexts into the database. Once sufficient intertexts have been entered, work can begin on the creation of analytical tools, enabling researchers to ask and answer questions about the data.
 
-Beyond those crucial improvements, a few additional potential long-term developments are:
+A few additional, potential, long-term developments are:
 
 - an option to view only direct intertext density
 - an option to view &ldquo;descendant&rdquo; intertexts instead of &ldquo;ancestor&rdquo; intertexts in the density display
 
@@ -0,0 +1 @@
+[{"intxt_grp_id": "21049513", "intxt_id": "21049515", "source_word_id": "21049507", "target_word_id": "21049502", "source_author_id": "20336404", "source_work_id": "20336405", "source_work_seg_id": "20336406", "source_line_num": "718", "target_author_id": "20215016", "target_work_id": "20215018", "target_work_seg_id": "20238543", "target_line_num": 829, "match_type_ids": ["20215033", "20240810"], "original_id": "21049508", "original_grp_id": "21049513"}]
@@ -5,6 +5,7 @@
 import requests
 import pandas as pd
 import networkx as nx
+import copy
 
 # set parameters
 api_token = os.getenv("NODEGOAT_API_TOKEN")
@@ -155,6 +156,18 @@ def get_object_ids(model):
             authorship_prob_class_table = objtype["objects"]
             tables_dict["authorship_prob_class_table"] = authorship_prob_class_table
             break
+        elif objtype["objects"][id_num]["object"]["type_id"] == 23064:
+            textual_prob_table = objtype["objects"]
+            tables_dict["textual_prob_table"] = textual_prob_table
+            break
+        elif objtype["objects"][id_num]["object"]["type_id"] == 23065:
+            alternate_reading_table = objtype["objects"]
+            tables_dict["alternate_reading_table"] = alternate_reading_table
+            break
+        elif objtype["objects"][id_num]["object"]["type_id"] == 23066:
+            word_lvl_intxt_mod_table = objtype["objects"]
+            tables_dict["word_lvl_intxt_mod_table"] = word_lvl_intxt_mod_table
+            break
         else:
             pass
         # end of inner for loop
@@ -272,7 +285,7 @@ def remove_decimal(id_string):
                       "max_length": {"67537": "objval"},
                       #"unit_line": {"68127": "objval"}
                       }
-### The rest aren't necessary for the actual visualization ###
+### The next three aren't necessary for the actual visualization ###
 publication_cols = {"author_ids": {"67416": "refid"},
                     "publication_date": {"67417": "objval"},
                     "article_chapter_title": {"67418": "objval"},
@@ -286,6 +299,26 @@ def remove_decimal(id_string):
                  "PID": {"67431": "objval"},
                  # in future, may add latitude and longitude from sub-object, but that would require additional logic
                  }
+###
+textual_prob_cols = {
+    "work_segment_id": {"71932": "refid"},
+    "line_num": {"71933": "objval"},
+    "line_num_modifier": {"71934": "objval"},
+    "start_pos_id": {"71935": "refid"},
+    "stop_pos_id": {"71936": "refid"}
+}
+alternate_reading_cols = {
+    "textual_prob_id": {"71937": "refid"},
+    "word_inst_ids": {"71938": "refid"},
+    "default_reading": {"71939": "objval"}
+}
+wd_lvl_intxt_mod_cols = {
+    "wd_lvl_intxt_id": {"71940": "refid"},
+    "wd_to_replace_id": {"71941": "refid"},
+    "wd_sub_id": {"71942": "refid"},
+    "match_type_remove_ids": {"71943": "refid"},
+    "match_type_add_ids": {"71944": "refid"}
+}
 
 # Convert tables to dataframes based on specified columns
 word_instance_df = table_to_df(word_instance_table, wd_inst_cols)
@@ -303,6 +336,9 @@ def remove_decimal(id_string):
 scholar_df = table_to_df(scholar_table,scholar_cols)
 publication_df = table_to_df(publication_table,publication_cols)
 pleiades_df = table_to_df(pleiades_table,pleiades_cols)
+textual_prob_df = table_to_df(textual_prob_table,textual_prob_cols)
+alternate_reading_df = table_to_df(alternate_reading_table,alternate_reading_cols)
+word_lvl_intxt_mod_df = table_to_df(word_lvl_intxt_mod_table,wd_lvl_intxt_mod_cols)
 
 # For `word instance` df, make sure that elided_monosyllable is either False or True, not None:
 word_instance_df['elided_monosyllable'] = word_instance_df['elided_monosyllable'].apply(lambda x: False if x is None else x)
@@ -437,19 +473,20 @@ def remove_decimal(id_string):
     tables_df_to_dict[df_name] = new_dict
 
 sources_table = []
-for obj_id in word_lvl_intxt_table:
-    intxt_sources = word_lvl_intxt_table[obj_id]['object']['object_sources']
-    if isinstance(intxt_sources, dict):
-        for source_type_id in intxt_sources.keys():
-            for source in intxt_sources[source_type_id]:
-                sources_dict = {}
-                sources_dict['obj_id'] = obj_id
-                sources_dict['source_type_id'] = source_type_id
-                source_id = source['object_source_ref_object_id']
-                sources_dict['source_id'] = str(source_id)
-                source_location = source['object_source_link']
-                sources_dict['source_location'] = source_location
-                sources_table.append(sources_dict)
+for table in [word_lvl_intxt_table, word_lvl_intxt_mod_table]:
+    for obj_id in table:
+        intxt_sources = table[obj_id]['object']['object_sources']
+        if isinstance(intxt_sources, dict):
+            for source_type_id in intxt_sources.keys():
+                for source in intxt_sources[source_type_id]:
+                    sources_dict = {}
+                    sources_dict['obj_id'] = obj_id
+                    sources_dict['source_type_id'] = source_type_id
+                    source_id = source['object_source_ref_object_id']
+                    sources_dict['source_id'] = str(source_id)
+                    source_location = source['object_source_link']
+                    sources_dict['source_location'] = source_location
+                    sources_table.append(sources_dict)
     # else:
     #     sources_dict = {'obj_id': obj_id, 'source_type_id': None, 'source_id': None}
     #     sources_table.append(sources_dict)
@@ -470,14 +507,13 @@ def remove_decimal(id_string):
 def build_intxt_dict(intxt_ids):
     for intxt in intxt_ids:
         intxt_id = str(intxt)
-        for row2 in word_lvl_intxt_df[word_lvl_intxt_df.obj_id == intxt_id].iterrows():
+        for i, row2 in word_lvl_intxt_df[word_lvl_intxt_df.obj_id == intxt_id].iterrows():
             row_dict = {}
             if intxt_id in grp_intxts_list:
                 row_dict["intxt_grp_id"] = intxt_grp_id
             else:
                 row_dict["intxt_grp_id"] = None
             row_dict["intxt_id"] = intxt_id
-            row2 = row2[1]
             source_id = row2.source_word_id
             target_id = row2.target_word_id
             if isinstance(row2.match_type_ids, list):
@@ -509,18 +545,55 @@ def build_intxt_dict(intxt_ids):
             row_dict["match_type_ids"] = match_type_ids
             intxt_grp_list.append(row_dict)
 
-for row in word_lvl_intxt_grp_df.iterrows():
-    row = row[1]
+for i, row in word_lvl_intxt_grp_df.iterrows():
     intxt_grp_id = row.obj_id
     intxt_ids = row.word_intxt_ids
     build_intxt_dict(intxt_ids)
+
+# do the same for intertexts not included in a group
 build_intxt_dict([intxt for intxt in word_lvl_intxt_df.obj_id if intxt not in grp_intxts_list])
 
 intxt_full_df = pd.DataFrame.from_dict(intxt_grp_list)
 
 with open(scriptdir+"/intxts_full.json", "w") as intxts_full:
     json.dump(intxt_grp_list, intxts_full)
 
+# make a list of full intertexts modified based on potential word substitutions due to alternate readings
+
+intxts_to_modify_df = intxt_full_df.query(f"intxt_id in {word_lvl_intxt_mod_df['wd_lvl_intxt_id'].to_list()}").copy().reset_index(drop=True)
+intxt_full_mod = []
+
+for i, row in intxts_to_modify_df.iterrows(): # take each original full intertext that needs to be modified
+    intxt_mod_subset = word_lvl_intxt_mod_df.query("wd_lvl_intxt_id == @row.intxt_id")  # get possible modifications for the current original itnertext
+    for j, row2 in intxt_mod_subset.iterrows():
+        new_intxt_full = copy.deepcopy({key: val for key, val in row.items()})  # new deep copy dictionary of unmodified full intertext
+        new_intxt_full['intxt_id'] = row2['obj_id']
+        for st in ['source','target']:
+            if row[f'{st}_word_id'] == row2['wd_to_replace_id']:
+                new_intxt_full[f'{st}_word_id'] = row2['wd_sub_id']
+                new_word = word_instance_df.query(f"obj_id == '{row2["wd_sub_id"]}'").reset_index(drop=True)
+                new_intxt_full[f'{st}_line_num'] = new_word.loc[0, 'line_num']
+                new_workseg = new_word.loc[0, "work_segment_id"]
+                if row[f'{st}_work_seg_id'] != new_workseg:
+                    new_intxt_full[f'{st}_work_seg_id'] = new_workseg
+                    new_work = work_seg_df.query("obj_id == @new_workseg").reset_index(drop=True).loc[0, "work_id"]
+                    new_intxt_full[f'{st}_work_id'] = new_work
+                    new_author = work_df.query("obj_id == @new_work").reset_index(drop=True).loc[0, "author_id"]
+                    new_intxt_full[f'{st}_author_id'] = new_author
+        for id in row2.match_type_remove_ids:
+            new_intxt_full['match_type_ids'].remove(id)
+        for id in row2.match_type_add_ids:
+            new_intxt_full['match_type_ids'].append(id)
+        new_intxt_full['original_id'] = row.intxt_id
+        new_intxt_full['original_grp_id'] = row.intxt_grp_id
+        if new_intxt_full['source_work_seg_id'] != row['source_work_seg_id'] or new_intxt_full['target_work_seg_id'] != row['target_work_seg_id']:
+            new_intxt_full['intxt_grp_id'] = None
+
+        intxt_full_mod.append(new_intxt_full)
+
+with open(scriptdir+"/intxts_full_modified.json", "w") as intxts_full_mod_file:
+    json.dump(intxt_full_mod, intxts_full_mod_file)
+
 
 ######### CREATE AND EXPORT NETWORK ####################
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+[{"intxt_grp_id": "21049513", "intxt_id": "21049515", "source_word_id": "21049507", "target_word_id": "21049502", "source_author_id": "20336404", "source_work_id": "20336405", "source_work_seg_id": "20336406", "source_line_num": "718", "target_author_id": "20215016", "target_work_id": "20215018", "target_work_seg_id": "20238543", "target_line_num": 829, "match_type_ids": ["20215033", "20240810"], "original_id": "21049508", "original_grp_id": "21049513"}]`