T-F2

Berlin, Staatsbibliothek, MSS Simulata Orientalia 6

Digitization available via The Digitized Collections of the Staatsbibliothek zu Berlin HERE.

Photograph no.101 (featured upside down online) is the recto, and photograph no.89 is the verso.

show the code
# Load the xml2 package
library(xml2)

#Path to xml file
path_xml <- "data/T-F2.xml"
# function to create html from xml
convert_xml_to_html <- function(xml_file, 
                                version_name = c("minimal", "intermediate", "extensive"),
                                min_line = NULL, 
                                max_line = NULL) {
    
    # Check if the version name is valid
    version_name <- match.arg(version_name)
    
    # WHITESPACE PRESERVATION: Apply placeholders before xml2 parsing
    # Read the entire XML file as a single string (avoids readLines warnings)
    file_size <- file.info(xml_file)$size
    raw_xml <- readChar(xml_file, file_size)
    
    # Replace whitespace between elements with placeholder tags
    processed_xml <- gsub("(</[^>]+>)\\s+(<[^>]*>)", "\\1<SPACE/>\\2", raw_xml)
    
    # Now parse the XML with placeholders in place
    xml_file <- read_xml(processed_xml)
    
    # Extract both <l>, <cb>, and <pb> elements
    lines <- xml_find_all(xml_file, "//text//l | //text//cb | //text//pb")
    
    # Initialize an empty list to store the processed lines
    processed_lines <- list()
    
    # --- NEW: state for folio handling and range tracking
    line_count <- 0
    in_desired_range <- FALSE
    last_folio <- NULL           # most recent <pb n="..."> seen (always recorded)
    folio_emitted <- FALSE       # ensure we only inject the prelude folio once
    need_prelude_folio <- FALSE  # signal to inject just before first kept line
    
    # Function to process each element with whitespace preservation
    process_line <- function(line) {
        
        # Handle page breaks: always record latest folio, render only if in-range
        if (xml_name(line) == "pb") {
            last_folio <<- xml_attr(line, "n")  # record regardless of range
            if (!in_desired_range) return(NULL) # do not render until we're in range
            return(sprintf("<div class='folio-number'>%s</div>", last_folio))
        }
        
        # Only increment line count and compute range for verse lines
        if (xml_name(line) == "l") {
            line_count <<- line_count + 1
            
            # compute whether *this* line is in range
            current_in_range <- if (is.null(min_line) && is.null(max_line)) {
                TRUE
            } else if (is.null(min_line) && !is.null(max_line)) {
                line_count <= max_line
            } else if (!is.null(min_line) && is.null(max_line)) {
                line_count >= min_line
            } else {
                line_count >= min_line && line_count <= max_line
            }
            
            # detect first entry into range (FALSE -> TRUE)
            if (current_in_range && !in_desired_range) {
                need_prelude_folio <<- TRUE
            }
            in_desired_range <<- current_in_range
            
            if (!in_desired_range) return(NULL) # Skip lines outside our range
            
            # Add line number div every 5th line
            line_number <- if (line_count %% 5 == 0) {
                sprintf("<div class='line-number'>%d</div>", line_count)
            } else {
                ""
            }
        } else {
            # Non-<l> elements (<cb>, etc.)
            line_number <- ""
            if (!in_desired_range) return(NULL) # Skip non-line elements outside range
        }
        
        # Process emph elements (decorative initials)
        emph_elements <- xml_find_all(line, ".//emph")
        for (emph in emph_elements) {
            # Get the initial letter text
            initial_letter <- xml_text(emph)
            
            # Get the size attribute (n) and style attribute (rend)
            size_attr <- xml_attr(emph, "n")
            style_attr <- xml_attr(emph, "rend")
            
            # Create the HTML for the styled initial
            styled_initial <- sprintf('<span class="decorative-initial %s" data-size="%s">%s</span>', 
                                      style_attr, size_attr, initial_letter)
            
            # Replace the emph element with the styled HTML
            xml_text(emph) <- styled_initial
        }
        
        if (version_name == "minimal") {
            # Find all <choice> elements within the line
            choices <- xml_find_all(line, ".//choice")
            for (choice in choices) {
                # Extract the text from the <orig> tag
                orig_text <- xml_text(xml_find_first(choice, ".//orig"))
                # Remove all children and set text directly
                xml_remove(xml_children(choice))
                xml_text(choice) <- orig_text
            }
            # remove any <reg> entirely
            reg_elements <- xml_find_all(line, ".//reg")
            xml_remove(reg_elements)
            
        } else if (version_name == "intermediate") {
            # Find all <choice> elements within the line
            choices <- xml_find_all(line, ".//choice")
            for (choice in choices) {
                abbr_exists <- xml_find_first(choice, ".//abbr")
                expan_exists <- xml_find_first(choice, ".//expan")
                # First check if there's an intermediate tag
                if (!is.na(abbr_exists) && !is.na(expan_exists)) {
                    intermediate_node <- xml_find_first(choice, ".//expan/intermediate")
                    if (!is.na(intermediate_node)) {
                        # If intermediate exists, use its text
                        text_to_use <- xml_text(intermediate_node)
                    } else {
                        # If no intermediate, use reg text
                        text_to_use <- xml_text(xml_find_first(choice, ".//expan/reg"))
                    }
                    xml_remove(xml_children(choice))
                    xml_text(choice) <- text_to_use
                } else {
                    orig_text <- xml_text(xml_find_first(choice, ".//orig"))
                    xml_remove(xml_children(choice))
                    xml_text(choice) <- orig_text
                }
            }
            # remove any <reg> entirely
            reg_elements <- xml_find_all(line, ".//reg")
            xml_remove(reg_elements)
            
        } else if (version_name == "extensive") {
            # remove <orig> entirely
            orig_elements <- xml_find_all(line, ".//orig")
            xml_remove(orig_elements)
            # collapse <choice> to <reg>
            choices <- xml_find_all(line, ".//choice")
            for (choice in choices) {
                reg_text <- xml_text(xml_find_first(choice, ".//reg"))
                xml_remove(xml_children(choice))
                xml_text(choice) <- reg_text
            }
        }
        
        # Remove <lb/> tags entirely
        lb_elements <- xml_find_all(line, ".//lb")
        xml_remove(lb_elements)
        
        # Replace SPACE placeholder elements with actual spaces
        space_elements <- xml_find_all(line, ".//SPACE")
        for (space_elem in space_elements) {
            xml_text(space_elem) <- " "
            xml_name(space_elem) <- "span"
            xml_attr(space_elem, "class") <- "space-marker"
        }
        
        # Extract text
        line_text <- xml_text(line)
        
        # Clean up extra whitespace but preserve intended spaces
        line_text <- trimws(line_text)
        line_text <- gsub("\\s+", " ", line_text)
        
        # Return line with number if applicable
        return(sprintf("<div style='line-height: 1.2;'>%s%s</div>", 
                       line_number, line_text))
    }
    
    # Process each line and wrap with <div class='column-break'> at each <cb/>
    for (line in lines) {
        processed_line <- process_line(line)
        if (!is.null(processed_line)) {
            
            # --- NEW: inject the folio that immediately precedes the first kept line
            if (need_prelude_folio && !folio_emitted && !is.null(last_folio)) {
                processed_lines <- c(processed_lines,
                                     sprintf("<div class='folio-number'>%s</div>", last_folio))
                folio_emitted <- TRUE
                need_prelude_folio <- FALSE
            }
            
            if (xml_name(line) == "cb" && in_desired_range) {
                # Add a closing </div> for the previous column and an opening <div> for the new column
                processed_lines <- c(processed_lines, "</div>", "<div class='column-break'>")
            } else {
                # Process the line and add it to the current column
                processed_lines <- c(processed_lines, processed_line)
            }
        }
    }
    
    # Process lines and wrap in edition-text div
    formatted_text <- paste(c("<div class='edition-text'>", processed_lines, "</div>"), collapse = "\n")
    cat(formatted_text)
}
# PRINT "MINIMAL" VERSION
convert_xml_to_html(xml_file = path_xml, version_name = "minimal")
# PRINT "INTERMEDIATE VERSION"
convert_xml_to_html(xml_file = path_xml, version_name = "intermediate")
#PRINT "EXTENSIVE" VERSION
convert_xml_to_html(xml_file = path_xml, version_name = "extensive")

Minimal

???r
Mas Sete Eglese a grat besu
que per le faces oresun
e Dis li otreit pas durable
e la desfende Deable
5
Marie lia respundu
Dex la goert per savtu
el agarde vrs orient
Ses mins drecet au ceo leste
e pree de loCriator
10
Mot piemt per sa docor
Si que la boche limovet
Mas nulle voiz ne se eset
de tre fusepre ravie
Si que elle ni tochot mie
15
Sosmas ot paor mot grant
Jesu crist en trait a garant
Lai o elle fist sa preere
[]tre sires laveit si chere
[]ue plus q dos peiz e demie
20
[]veit entre la tre e le
???v
de la paor qot Zosimas
se comca aleir su pas
cuda fantome aveir troe
areres sesteit reuse
25
qant Marie lenvit torneir
Comenca lo a apeleir
Chelles dist elle Zosimas
beau pereschers per q dotas
Per que dotas per une fene
30
Saches que je se crestienne
baee fui en menface
en Deai tote mafiace
Ici see per espenier
Nen partirai jusqe aumorir
35
Laseinte croiz fist en su frunt
Sadestre mei leva amunt
Li sens hermites co bei vit
aus peiz aladome cheit

Intermediate

???r
Mas Seinte Eglese a grant besong
que per le faces oresun
e Dis li otreit pas durable
e la desfende Deable
5
Marie lia respundu
Dex la goert per savertu
el agarde vers orient
Ses mains drecet au ceo lestent
e pree de loCriator
10
Mot piement per sa docor
Si que la boche limovet
Mas nulle voiz ne se eset
de tere fusempres ravie
Si que elle ni tochot mie
15
Sosmas ot paor mot grant
Jesu crist en trait a garant
Lai o elle fist sa preere
[]tre sires laveit si chere
[]ue plus que dos peiz e demie
20
[]veit entre la tere e le
???v
de la paor qot Zosimas
sen commenca aleir sun pas
cuda fantome aveir trove
areres sesteit reuse
25
qant Marie lenvit torneir
Comenca lo a apeleir
Chelles dist elle Zosimas
beau pereschers per quoi dotas
Per que dotas per une femne
30
Saches que je se crestienne
baee fui en menfance
en Deai tote mafiance
Ici see per espenier
Nen partirai jusque aumorir
35
Laseinte croiz fist en sun frunt
Sadestre mein leva amunt
Li sens hermites co bein vit
aus peiz aladome cheit

Extensive

???r
Mais Seinte Eglese a grant besong
que per le facés oresun
e Dix li otreit pas durable
e la desfende Deable
5
Marie li a respundu
Dex la goert per sa vertu
el agarde vers orient
Ses mains drecet au ciel les tent
e pree de lo Criator
10
Molt piement per sa doçor
Si que la boche li movet
Mas nulle voiz ne se eset
de tere fu sempres ravie
Si qu’elle n’i tochot mie
15
Zosimas ot paor molt grant
Jesucrist en trait a garant
Lai o elle fist sa preere
[Nos]tre sires l’aveit si chere
[Q]ue plus que dos piez e demie
20
[A]veit entre la tere e le
???v
de la paor q’ot Zosimas
s’en commenca aleir sun pas
cuida fantome aveir trové
areres s’esteit reusé
25
qant Marie l’en vit torneir
Comença lo a apeleir
Chelles dist elle Zosimas
beau peres chers per quoi dotas
Per quoi dotas per une femne
30
Saches que je seo crestienne
batisie fui en m’enfance
en Dé ai tote ma fiance
Ici seo per espenier
N’en partirai jusque au morir
35
La seinte croiz fist en sun frunt
Sa destre mein leva amunt
Li sens hermites ço bein vit
aus peiz a la dome cheit