Jeffrey C. Witt (Loyola University Maryland)
https://jeffreycwitt.com | jcwitt@loyola.edu
March 05, 2026, Austrian Academy of Sciences, Vienna, Austria
Slide Deck: http://jeffreycwitt.com/slides/2026-03-05-vienna
def createDocsDict(block_text_dict):
#Step 1 get map of all items and their descendant blocks
maptable = pd.read_csv("data/block-item-toplevel-map-2023-11-28.csv")
maptable[["block", "item", "toplevel"]] = maptable[["block", "item", "toplevel"]].apply(lambda x: x.str.strip())
block_item_dict = dict(zip(maptable['block'], maptable['item']))
# Step 2 (Hack) get map of all articles in Aquinas ST and Scriptum and their descendant blocks
aamaptable = pd.read_csv("data/aquinasSTandScriptumArticleBlocks-2023-11-28.csv")
aamaptable[["block", "article", "toplevel"]] = aamaptable[["block", "article", "toplevel"]].apply(lambda x: x.str.strip())
aablock_item_dict = dict(zip(aamaptable['block'], aamaptable['article']))
# Step 3 construct the documents dictionary
documents_dict = {}
## Step 3b Loop through every paragraph and concat it to the document for the item it belongs to
for key, value in block_text_dict.items():
## Step 3c (Hack) for just those blocks that belong to Aquinas ST and Scriptum,
## create a document at the article level rather than the item level
if (key.startswith("TAca84-") or key.startswith("ta-")):
if aablock_item_dict.get(key):
itemid = aablock_item_dict.get(key)
current_value = documents_dict.get(itemid)
if current_value:
documents_dict[itemid] = current_value + ' ' + value
else:
documents_dict[itemid] = value
else:
## Step 3d If block is not part of Aquinas ST or Scriptum, create document at the item level as normal
if block_item_dict.get(key):
itemid = block_item_dict.get(key)
current_value = documents_dict.get(itemid)
if current_value:
documents_dict[itemid] = current_value + ' ' + value
else:
documents_dict[itemid] = value
return documents_dict
# get all levels within each toplevel document
mapping = collect_unique_levels()
# save a key value dictionary of docs ids, and doc text at the specified level
for k, v in mapping.items():
for level in v:
docsDict = saveToDiskFullDocList(k, level=level)