-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
216 lines (178 loc) · 8.34 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import streamlit as st
import numpy as np
import pandas as pd
import rongzi
import graphviz
from fpdf import FPDF
import base64
from tempfile import NamedTemporaryFile
rz = rongzi.RongZi()
def phrase():
st.title("Phrase Path Visualization")
st.markdown('Like "six degrees of separation," for Chinese Characters.')
st.markdown('---')
def load_chengyus() -> pd.DataFrame:
"""Get dataframe of chengyu data"""
chengyus = pd.read_csv('./assets/chengyu.tsv', sep='\t').set_index('Proverb')
cols = ['Literal Meaning', 'Figurative Meaning', 'Pinyin']
chengyus = chengyus[cols]
return chengyus
def choose_input_method() -> tuple[str, list[str]]:
"""User decides how to get the chinese character components: random or user-input"""
input_methods = ['Random proverb', 'Type a Chinese phrase in yourself']
input_methods_prompt = "Choose a Chinese phrase by one of two methods"
input_method = st.radio(input_methods_prompt, input_methods)
st.markdown(f'---\n')
return input_method, input_methods
def get_phrase() -> str:
"""
based on the selected input method:
- populate subsequent sections of the page
- return the phrase as string p
"""
def refresh_page():
"refresh page, to get a new random chengyu"
st.experimental_singleton.clear()
return None
# get random chengyu
if input_method == input_methods[0]:
p = np.random.choice(chengyus.index)
label = 'Pick a new proverb.'
st.button(label, refresh_page())
# get user-input phrase
if input_method == input_methods[1]:
label = 'Enter a name or proverb in Chinese characters,\nmaximum 7 characters:'
p = st.text_input(label, max_chars=7)
return p
def show_path_graph(phrase, pp:pd.DataFrame):
if pp.empty:
return None
dot = rz.get_paths_graph(pp)
st.graphviz_chart(dot)
# export_graphviz_pdf(phrase, dot)
def show_alternative_paths(pp:pd.DataFrame):
"Display alternative paths as dataframe results"
if pp.empty:
return None
st.markdown('---')
st.markdown("#### Alternative paths between each character in the phrase.")
st.markdown(pp.to_markdown(index=False))
return None
def show_chengyu_info():
# show translation of chengyu (if it's a chengyu)
if input_method == input_methods[0]:
chengyu = chengyus.loc[p]
chengyu = "**"+chengyu.index+"**: " + chengyu
chengyu = ' \n'.join(chengyu)
st.markdown(chengyu)
chengyus = load_chengyus()
input_method, input_methods = choose_input_method()
p = get_phrase()
phrase_paths = rz.analyze_sequence(p) # Optimize component path
st.markdown(f"## {p}") # show phrase
show_path_graph(p, phrase_paths)
show_chengyu_info() # only if user selected "random proverb"
# TODO: PUT THIS IN A "DETAILS" DROP-DOWN
show_alternative_paths(phrase_paths)
def family_tree():
st.title("Single-Character Family-Tree Graph Visualization")
def choose_input_method() -> tuple[str, list[str]]:
"""User decides how to get a single chinese character: random or user-input"""
input_methods = ['Random character', 'Type a Chinese character in yourself']
input_methods_prompt = "Choose a single Chinese character (or component) by one of two methods"
input_method = st.radio(input_methods_prompt, input_methods)
st.markdown(f'---\n')
return input_method, input_methods
def get_component() -> str:
"""
based on the selected input method:
- populate subsequent sections of the page
- return the component as string c
"""
def get_random_component():
def refresh_page():
"refresh page, to get a new random character"
st.experimental_singleton.clear()
return None
def remove_silently(l:list, c:str) -> list:
try:
return l.remove(c)
except ValueError:
return l
c = ''
# get random component with num_kids > 0 and num_parents >0
while input_method == input_methods[0]:
c = np.random.choice(rz.ccd.index)
# only take a component with both kids and parents
len_kids = len(rz.get_kids(c))
len_parents = len(remove_silently(rz.get_parents(c), c))
if any([len_kids==0, len_parents==0]) is True:
continue
label = 'Pick a new character.'
st.button(label, refresh_page())
break
return c
c = get_random_component()
if input_method == input_methods[1]:
label = 'Enter a single chinese character or component'
c = st.text_input(label, max_chars=1)
return c
input_method, input_methods = choose_input_method()
c = get_component()
max_sibs = 50
excess_kids = False
if (c != ''):
st.markdown(f"## {c}")
tree_rz = rongzi.RongZi(c)
excess_kids = tree_rz.get_vertical_family_tree(max_sibs)
# export_graphviz_pdf(f"family_tree_{c}", tree_rz.vert_tree)
st.graphviz_chart(tree_rz.vert_tree)
if excess_kids:
st.markdown(f'*Note: Some components in this tree are so common that they are present in more than {max_sibs} larger characters. '\
f'So, the children of the following list of characters: {excess_kids} were truncated to a maximum of {max_sibs} in this graph.*')
def export_graphviz_pdf(filename:str, dot):
# REMOVED THIS FUNCTION BECAUSE IT WASN'T WORKING YET ON STREAMLIT CLOUD
# (IT WAS WORKING ON STREAMLIT HOSTED ON MY AWS EC2 INSTANCE,
# BUT LOCALLY-HOSTED STREAMLIT ONLY NATIVELY OFFERS HTTP, NOT HTTPS.)
# THE ISSUE MAY HAVE HAD SOMETHING TO DO WITH GRAPHVIZ ITSELF NEEDING TO BE
# INSTALLED ON THE SERVER ITSELF, NOT JUST THE PYTHON GRAPHVIZ LIBRARY,
# FOR THE PDF EXPORT TO WORK RIGHT.
# def _create_download_link(val, filename):
# b64 = base64.b64encode(val) # val looks like b'...'
# return_str = f'''<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}.pdf">Download pdf</a>
# <i>(Pdf format may be easier to resize and view.)</i>'''
# return return_str
# pdf = FPDF()
# pdf.add_page()
# with NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
# bytes_file = graphviz.pipe('dot', 'pdf', bytes(dot.source, 'utf-8'))
# html = _create_download_link(bytes_file, filename)
# st.markdown(html, unsafe_allow_html=True)
pass
linkback_markdown = """
#### links to:
- My portfolio: [portfolio.bhrdwj.net](https://portfolio.bhrdwj.net)
- This app's github: [github.com/bhrdj/rongzi](https://github.com/bhrdj/rongzi)
---
# rongzi app demo
"""
todo_markdown = """
---
TODO list:
- App currently mixes Traditional character components as found in Taiwan (ROC) and Simplified components as in Mainland China (PRC). Alternative options for managing this aspect should be available.
- Many components have very low use-frequency, and are nonetheless treated equivalently as more common characters. Prioritizing high-frequency characters and components when optimizing phrase-paths should be implemented.
- For the Chinese-language-learner use-case, adding in example sentences using the proverbs would be helpful.
- Remove redundant circles and boxes around phrase-paths graph visualization.
"""
def main():
st.sidebar.markdown(linkback_markdown)
page_titles = ["Proverb Exploration","Family-Tree Graph Visualization"]
page = st.sidebar.radio('', page_titles, index=0)
st.sidebar.markdown('')
st.sidebar.markdown(todo_markdown)
if page == page_titles[0]:
phrase()
elif page == page_titles[1]:
family_tree()
if __name__ == "__main__":
main()