Skip to content

Commit

Permalink
Checkpoint for #3725, #3634 & #2151
Browse files Browse the repository at this point in the history
  • Loading branch information
benwbrum committed Aug 28, 2023
1 parent f9408ae commit 96b5842
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 12 deletions.
78 changes: 66 additions & 12 deletions app/helpers/export_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -417,20 +417,24 @@ def xml_to_export_tei(xml_text, context, page_id = "", add_corrsp=false)
# xml_text = titles_to_divs(xml_text, context)
doc = REXML::Document.new(xml_text)
#paras_string = ""

binding.pry
my_display_html = ""
doc.elements.each_with_index("//p") do |e,i|
transform_links(e)
transform_expansions(e)
transform_regularizations(e)
transform_marginalia_and_catchwords(e)
transform_footnotes(e)
transform_lb(e)
e.add_attribute("xml:id", "#{page_id_to_xml_id(page_id, context.translation_mode)}P#{i}")
if add_corrsp
e.add_attribute("corresp", "#{page_id_to_xml_id(page_id, !context.translation_mode)}P#{i}")
tags = ['table', 'p']
tags.each do |tag|
doc.elements.each_with_index("//#{tag}") do |e,i|
transform_links(e)
transform_expansions(e)
transform_regularizations(e)
transform_marginalia_and_catchwords(e)
transform_footnotes(e)
transform_lb(e)
transform_tables(e)
e.add_attribute("xml:id", "#{page_id_to_xml_id(page_id, context.translation_mode)}P#{i}")
if add_corrsp
e.add_attribute("corresp", "#{page_id_to_xml_id(page_id, !context.translation_mode)}P#{i}")
end
my_display_html << e.to_s
end
my_display_html << e.to_s
end

return my_display_html.gsub('<lb/>', "<lb/>\n").gsub('</p>', "\n</p>\n\n").gsub('<p>', "<p>\n").encode('utf-8')
Expand Down Expand Up @@ -501,6 +505,56 @@ def transform_marginalia_and_catchwords(p_element)
end
end

def transform_tables(p_element)
# convert HTML tables to TEI tables
p_element_string = p_element.to_s
p_element.elements.each("//table") do |e|
row_count = 0
max_column_count = 0
table = REXML::Element.new("table")
# does the table have a header?
if e.elements["thead"]
# convert the header into a row element with role="label"
header = REXML::Element.new("row")
header.add_attribute("role", "label")
e.elements.each("thead/tr/th") do |th|
# convert the th into a cell element
cell = REXML::Element.new("cell")
cell.add_attribute("role", "data")
th.children.each { |child| cell.add(child) }
header.add(cell)
end
table.add(header)
end
# now convert the body of the table
e.elements.each("tbody/tr") do |tr|
row = REXML::Element.new("row")
tr.elements.each("td") do |td|
cell = REXML::Element.new("cell")
cell.add_attribute("role", "data")
td.children.each { |child| cell.add(child) }
if cell.children.count > max_column_count
max_column_count = cell.children.count
end
row.add(cell)
end
row_count += 1
table.add(row)
end # end of tbody
table.add_attribute("rows", row_count)
table.add_attribute("cols", max_column_count)
e.replace_with(table)

end # end of tables
# now delete any lb elements from tables elements in the document
p_element.elements.each("//table") do |table|
table.elements.each("//lb") do |lb|
lb.remove
end
end

end

def transform_footnotes(p_element)
p_element.elements.each('//footnote') do |e|
marker = e.attributes['marker']
Expand Down
37 changes: 37 additions & 0 deletions app/helpers/tei_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,43 @@ def xml_to_tei(xml_text, preserve_lb=true)
e.replace_with(lb)
end
end
binding.pry
# convert HTML tables to TEI tables
doc.elements.each("//table") do |e|
table = REXML::Element.new("table")
# does the table have a header?
if e.elements["thead"]
# convert the header into a row element with role="label"
header = REXML::Element.new("row")
header.add_attribute("role", "label")
e.elements.each("thead/tr/th") do |th|
# convert the th into a cell element
cell = REXML::Element.new("cell")
cell.add_attribute("role", "data")
cell.add_child(th.children)
end
table.add(header)
end
# now convert the body of the table
e.elements.each("tbody/tr") do |tr|
row = REXML::Element.new("row")
tr.elements.each("td") do |td|
cell = REXML::Element.new("cell")
cell.add_attribute("role", "data")
cell.add_child(td.children)
row.add(cell)
end
table.add(row)
end # end of tbody
e.replace_with(table)
end # end of table
# now delete any lb elements from tables elements in the document
doc.elements.each("//table") do |table|
table.elements.each("//lb") do |lb|
lb.remove
end
end

unless user_signed_in?
doc.elements.each("//sensitive") do |e|
e.replace_with(REXML::Comment.new("sensitive information suppressed"))
Expand Down

0 comments on commit 96b5842

Please sign in to comment.