CAT-SOOP is a flexible, programmable learning management system based on the Python programming language. https://catsoop.mit.edu
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

888 lines
28 KiB

  1. # This file is part of CAT-SOOP
  2. # Copyright (c) 2011-2020 by The CAT-SOOP Developers <catsoop-dev@mit.edu>
  3. #
  4. # This program is free software: you can redistribute it and/or modify it under
  5. # the terms of the GNU Affero General Public License as published by the Free
  6. # Software Foundation, either version 3 of the License, or (at your option) any
  7. # later version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT
  10. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  12. # details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. """
  17. Handling of the CAT-SOOP specification language(s): Markdown, XML, and Python
  18. The real goal of parsing of a page's source is to convert it back to the
  19. original Python specification format. Markdown is translated to XML, which is
  20. translated to Python. The overall flow when parsing a page is:
  21. 1. If the content file is in Markdown, parse it down to HTML.
  22. 2. If the content file was in Markdown or XML, parse it down to Python
  23. (stripping out comments and seperating &lt;question&gt; tags into
  24. appropriate calls to `catsoop.tutor.question`).
  25. """
  26. import os
  27. import re
  28. import ast
  29. import sys
  30. import copy
  31. import random
  32. import string
  33. import hashlib
  34. import traceback
  35. from io import StringIO
  36. from collections import OrderedDict
  37. from . import tutor
  38. from . import dispatch
  39. from . import markdown_math
  40. from .errors import html_format, clear_info
  41. import markdown
  42. from markdown.extensions import tables
  43. from markdown.extensions import fenced_code
  44. from markdown.extensions import sane_lists
  45. from bs4 import BeautifulSoup
  46. from unidecode import unidecode
  47. _nodoc = {
  48. "BeautifulSoup",
  49. "OrderedDict",
  50. "StringIO",
  51. "clear_info",
  52. "html_format",
  53. "PYTHON_REGEX",
  54. "PYVAR_REGEX",
  55. "remove_common_leading_whitespace",
  56. "source_formats",
  57. "source_format_string",
  58. }
  59. _malformed_question = "<font color='red'>malformed <tt>question</tt></font>"
  60. _valid_qname = re.compile(r"^[A-Za-z][_A-Za-z0-9]*$")
  61. _unsafe_title = re.compile(r"[^A-Za-z0-9_]")
  62. def _safe_title(t, disallowed=None):
  63. disallowed = disallowed if disallowed is not None else set()
  64. title = otitle = "_%s" % (
  65. re.sub(r"_+", "_", _unsafe_title.sub("_", unidecode(t))).lower().strip("_")
  66. )
  67. count = 2
  68. while title in disallowed:
  69. title = "%s_%d" % (otitle, count)
  70. count += 1
  71. disallowed.add(title)
  72. return title
  73. def xml_pre_handle(context):
  74. """
  75. Translate the value in `cs_content` from XML to Python, storing the result
  76. as `cs_problem_spec` in the given context.
  77. This function mostly strips out comments and converts &lt;question&gt; tags
  78. into appropriate calls to `catsoop.tutor.question`.
  79. **Parameters:**
  80. * `context`: the context associated with this request (from which
  81. `cs_content` is taken)
  82. **Returns:** `None`
  83. """
  84. text = context["cs_content"]
  85. text = re.sub(_environment_matcher("comment"), "", text)
  86. tmp = text.split("<question")
  87. qcount = 0
  88. o = [tmp[0]]
  89. for piece in tmp[1:]:
  90. chunks = piece.strip().split(">", 1)
  91. if len(chunks) != 2:
  92. o.append(_malformed_question)
  93. break
  94. type_, rest = chunks
  95. otherrest = rest.split("</question>", 1)
  96. if len(otherrest) != 2:
  97. o.append(_malformed_question)
  98. break
  99. code, rest = otherrest
  100. e = dict(context)
  101. try:
  102. code = remove_common_leading_whitespace(code)
  103. if isinstance(code, int):
  104. o.append(
  105. (
  106. "<div><font color='red'><b>A Python Error Occurred:</b></font>"
  107. "<p><pre>"
  108. "Inconsistent indentation on line %d of question tag"
  109. "</pre></p></div>"
  110. )
  111. % code
  112. )
  113. o.append(rest)
  114. continue
  115. exec(code, e)
  116. if "csq_name" not in e:
  117. e["csq_name"] = "q%06d" % qcount
  118. qcount += 1
  119. if _valid_qname.match(e["csq_name"]):
  120. if type_ != "dummy":
  121. o.append(tutor.question(context, type_, **e))
  122. else:
  123. o.append(
  124. (
  125. '<div class="question">'
  126. '<font color="red">'
  127. "ERROR: Invalid question name <code>%r</code>"
  128. "</font></div>"
  129. )
  130. % e["csq_name"]
  131. )
  132. except:
  133. e = sys.exc_info()
  134. tb_entries = traceback.extract_tb(e[2])
  135. fname, lineno, func, text = tb_entries[-1]
  136. exc_only = traceback.format_exception_only(e[0], e[1])
  137. if e[0] == SyntaxError:
  138. tb_text = "Syntax error in question tag:\n"
  139. elif func == "<module>":
  140. tb_text = "Error on line %d of question tag." % lineno
  141. try:
  142. tb_text += "\n %s\n\n" % code.splitlines()[lineno - 1].strip()
  143. except:
  144. pass
  145. else:
  146. tb_text = context["csm_errors"].error_message_content(
  147. context, html=False
  148. )
  149. exc_only = [""]
  150. tb_text = "".join([tb_text] + exc_only)
  151. err = html_format(clear_info(context, tb_text))
  152. ret = (
  153. "<div><font color='red'>"
  154. "<b>A Python Error Occurred:</b>"
  155. "<p><pre>%s</pre><p>"
  156. "</font></div>"
  157. ) % err
  158. o.append(ret)
  159. o.append(rest)
  160. context["cs_problem_spec"] = o
  161. def _md(x):
  162. o = markdown.markdown(
  163. x,
  164. extensions=[
  165. tables.TableExtension(),
  166. fenced_code.FencedCodeExtension(),
  167. sane_lists.SaneListExtension(),
  168. markdown_math.MathExtension(),
  169. ],
  170. )
  171. return o
  172. def md_pre_handle(context, xml=True):
  173. """
  174. Translate the value in `cs_content` from Markdown to HTML
  175. **Parameters:**
  176. * `context`: the context associated with this request (from which
  177. `cs_content` is taken)
  178. **Optional Parameters:**
  179. * `xml` (default `True`): whether `catsoop.language.xml_pre_handle` should
  180. be invoked after translating to HTML
  181. **Returns:** `None`
  182. """
  183. text = context["cs_content"]
  184. text = re.sub(_environment_matcher("comment"), "", text)
  185. text = _md_format_string(context, text, False)
  186. context["cs_content"] = text
  187. if xml:
  188. xml_pre_handle(context)
  189. def py_pre_handle(context):
  190. """
  191. 'Pre-handler' for Python.
  192. This function exists to mirror the interface of `md_pre_handle` and
  193. `xml_pre_handle`, but it does nothing (since the `cs_problem_spec` does not
  194. need any additional processing at this point).
  195. **Parameters:**
  196. * `context`: the context associated with this request (from which
  197. `cs_content` is taken)
  198. **Returns:** `None`
  199. """
  200. pass
  201. def _md_format_string(context, s, xml=True):
  202. # generate a unique string to split around
  203. splitter = None
  204. while splitter is None or splitter in s:
  205. splitter = "".join(random.choice(string.ascii_letters) for i in range(20))
  206. # extract tags, replace with splitter
  207. tag_contents = []
  208. def _replacer(m):
  209. tag_contents.append(m.groups())
  210. return splitter
  211. tags_to_replace = context.get("cs_markdown_ignore_tags", tuple())
  212. tags = ("pre", "question", "(?:display)?math", "script", "showhide") + tuple(
  213. tags_to_replace
  214. )
  215. checker = re.compile(
  216. r"<(%s)(.*?)>(.*?)</\1>" % "|".join(tags), re.MULTILINE | re.DOTALL
  217. )
  218. text = re.sub(checker, _replacer, s)
  219. text = _md(text)
  220. num_tags = len(tag_contents)
  221. pieces = text.split(splitter)
  222. o = ""
  223. for ix, piece in enumerate(pieces):
  224. o += piece
  225. if ix < num_tags:
  226. t, r, b = tag_contents[ix]
  227. o += "<%s%s>%s</%s>" % (t, r, b, t)
  228. text = o
  229. if text.startswith("<p>") and text.endswith("</p>"):
  230. text = text[3:-4]
  231. return _xml_format_string(context, text) if xml else text
  232. def _xml_format_string(context, s):
  233. return handle_custom_tags(context, s)
  234. source_formats = OrderedDict(
  235. [
  236. ("catsoop", md_pre_handle),
  237. ("md", md_pre_handle),
  238. ("xml", xml_pre_handle),
  239. ("py", py_pre_handle),
  240. ]
  241. )
  242. """OrderedDict mapping source format names to formatting handlers"""
  243. source_format_string = OrderedDict(
  244. [
  245. ("catsoop", _md_format_string),
  246. ("md", _md_format_string),
  247. ("xml", _xml_format_string),
  248. ("py", _xml_format_string),
  249. ]
  250. )
  251. """OrderedDict mapping source format names to formatters"""
  252. def source_transform_string(context, s):
  253. """
  254. Convert the given string to HTML, based on the syntax associated with the
  255. type of the current content file.
  256. If the content file is Markdown, this will translate the string into HTML
  257. and handle custom tags. If the content file is in HTML or Python, custom
  258. tags will be handled, but no other translation will occur.
  259. **Parameters:**
  260. * `context`: the context associated with this request
  261. * `s`: the string to be translated to HTML
  262. **Returns:** the translated string
  263. """
  264. src_format = context.get("cs_source_format", None)
  265. if src_format is not None:
  266. return source_format_string[src_format](context, s)
  267. else:
  268. return s
  269. # Handling of custom XML tags
  270. def _environment_matcher(tag):
  271. return re.compile(
  272. """<%s>(?P<body>.*?)</%s>""" % (tag, tag),
  273. re.MULTILINE | re.DOTALL | re.IGNORECASE,
  274. )
  275. _matcher = r"[\#0\- +]*\d*(?:.\d+)?[hlL]?[diouxXeEfFgGcrs]"
  276. _matcher = r"(?:%%%s|%s)?" % (_matcher, _matcher)
  277. _pyvar_matcher = r"(?P<lead>^|[^\\])@(?P<fmt>%s){(?P<body>.+?)}" % _matcher
  278. PYVAR_REGEX = re.compile(_pyvar_matcher, re.DOTALL | re.IGNORECASE)
  279. """Regular expression for matching `@{}` syntax"""
  280. PYTHON_REGEX = re.compile(
  281. r"""<(?P<tag>python|printf) *(?P<opts>.*?)>(?P<body>.*?)</(?P=tag)>""",
  282. re.MULTILINE | re.DOTALL | re.IGNORECASE,
  283. )
  284. """Regular expression for matching &lt;python&gt; tags"""
  285. def remove_common_leading_whitespace(x):
  286. lines = x.splitlines()
  287. if len(lines) == 0:
  288. return ""
  289. for ix in range(len(lines)):
  290. if lines[ix].strip():
  291. break
  292. first_ix = ix
  293. candidate = re.match(_indent_regex, lines[first_ix])
  294. if candidate is None:
  295. return x
  296. candidate = candidate.group(1)
  297. for ix, i in enumerate(lines):
  298. if ix < first_ix or not i.strip():
  299. continue
  300. if not i.startswith(candidate):
  301. return ix
  302. lc = len(candidate)
  303. return "\n".join(i[lc:] for i in lines)
  304. def _tab_replacer(x):
  305. return x.group(1).replace("\t", " ")
  306. _indent_regex = re.compile(r"^(\s*)")
  307. def _replace_indentation_tabs(x):
  308. return re.sub(_indent_regex, _tab_replacer, x)
  309. _string_regex = re.compile(
  310. r"""(\"\"\"[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*\"\"\"|'''[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''|'[^\n'\\]*(?:\\.[^\n'\\]*)*'|"[^\n"\\]*(?:\\.[^\n"\\]*)*")""",
  311. re.MULTILINE | re.DOTALL,
  312. )
  313. def indent_code(c):
  314. strings = {}
  315. # start by removing strings and replacing them with unique character sequences
  316. def _replacer(x):
  317. new_id = None
  318. while new_id is None or new_id in strings or new_id in c:
  319. new_id = "".join(random.choice(string.ascii_letters) for i in range(20))
  320. strings[new_id] = x.group(1)
  321. return new_id
  322. c = re.sub(_string_regex, _replacer, c)
  323. # now that strings are out of the way, change the indentation of every line
  324. c = "\n".join(" %s" % _replace_indentation_tabs(i) for i in c.splitlines())
  325. c = " pass\n%s" % c
  326. # finally, reintroduce strings
  327. for k, v in strings.items():
  328. c = c.replace(k, v)
  329. return c
  330. def get_python_output(context, code, variables, line_offset=0):
  331. """
  332. Helper function. Evaluate code in the given environment, and return its
  333. output, if any.
  334. Makes use of a special variable `cs___WEBOUT`, which is a file-like
  335. object. Any data written to `cs___WEBOUT` will be returned. Overwrites
  336. `print` in the given environment so that it outputs to `cs___WEBOUT`
  337. instead of to stdout.
  338. **Parameters:**
  339. * `context`: the context associated with this request
  340. * `code`: a strin containing the Python code to be executed
  341. * `variables`: a dictionary representing the environment in which the code
  342. should be executed
  343. **Optional Parameters**:
  344. * `line_offset` (default `0`): the offset, in lines, of this code's
  345. &lt;python&gt; tag from the top of the source file; used in case an error
  346. occurs, to try to point authors to the right location in the original
  347. source file
  348. **Returns:** a string containing any values written to `cs___WEBOUT`
  349. """
  350. variables.update({"cs___WEBOUT": StringIO()})
  351. try:
  352. code = remove_common_leading_whitespace(code)
  353. if isinstance(code, int):
  354. return (
  355. "<div><font color='red'><b>A Python Error Occurred:</b></font>"
  356. "<p><pre>"
  357. "Inconsistent indentation on line %d of python tag (line %d of source)"
  358. "</pre></p></div>"
  359. ) % (code, code + line_offset + 1)
  360. code = indent_code(code)
  361. code = (
  362. (
  363. "_cs_oprint = print\n"
  364. "def myprint(*args, **kwargs):\n"
  365. ' if "file" not in kwargs:\n'
  366. ' kwargs["file"] = cs___WEBOUT\n'
  367. " _cs_oprint(*args, **kwargs)\n"
  368. "print = myprint\n"
  369. "try:\n\n"
  370. )
  371. + code
  372. + (
  373. "\nexcept Exception as e:\n"
  374. " raise e\n"
  375. "finally:\n"
  376. " print = _cs_oprint"
  377. )
  378. )
  379. code = code.replace("tutor.init_random()", "tutor.init_random(globals())")
  380. code = code.replace("tutor.question(", "tutor.question(globals(),")
  381. exec(code, variables)
  382. return variables["cs___WEBOUT"].getvalue()
  383. except:
  384. e = sys.exc_info()
  385. tb_entries = traceback.extract_tb(e[2])
  386. fname, lineno, func, text = tb_entries[-1]
  387. exc_only = traceback.format_exception_only(e[0], e[1])
  388. if e[0] == SyntaxError:
  389. tb_text = "Syntax error in Python tag:\n"
  390. def lineno_replacer(x):
  391. return "line %d" % (ast.literal_eval(x.group(1)) - 9)
  392. exc_only = [re.sub(r"line (\d)+", lineno_replacer, i) for i in exc_only]
  393. elif func == "<module>":
  394. tb_text = (
  395. "Error on line %d of Python tag (line %d of source):\n %s\n\n"
  396. % (
  397. lineno - 9,
  398. lineno + line_offset - 8,
  399. code.splitlines()[lineno - 1].strip(),
  400. )
  401. )
  402. else:
  403. tb_text = context["csm_errors"].error_message_content(context, html=False)
  404. exc_only = [""]
  405. tb_text = "".join([tb_text] + exc_only)
  406. err = html_format(clear_info(context, tb_text))
  407. ret = (
  408. "<div><font color='red'>"
  409. "<b>A Python Error Occurred:</b>"
  410. "<p><pre>%s</pre><p>"
  411. "</font></div>"
  412. ) % (err,)
  413. return ret
  414. def _make_python_handler(context, fulltext):
  415. if "cs__python_envs" not in context:
  416. context["cs__python_envs"] = {}
  417. def python_tag_handler(match):
  418. execcontext = context
  419. guess_line = fulltext[: match.start()].count("\n")
  420. # guess_line = 0
  421. d = match.groupdict()
  422. opts = (d["opts"] or "").strip().split(" ")
  423. body = d["body"]
  424. if d["tag"] == "printf":
  425. if len(opts) == 1 and opts[0] == "":
  426. f = "%s"
  427. else:
  428. f = opts[0]
  429. body = "print(%r %% (%s,))" % (f, body)
  430. opts = []
  431. out = ""
  432. # decide whether to show the code
  433. if "show" in opts:
  434. opts.remove("show")
  435. code = '<pre><code class="lang-python">%s</code></pre>'
  436. out += code % html_format(body)
  437. # decide whether to run the code
  438. if "norun" in opts:
  439. return (out).strip()
  440. # decide in which environment the code should be run
  441. for i in opts:
  442. if i.startswith("env="):
  443. envname = "=".join(i.split("=")[1:])
  444. if envname not in context["cs__python_envs"]:
  445. context["cs__python_envs"][envname] = {}
  446. execcontext = context["cs__python_envs"][envname]
  447. # run the code
  448. code_result = get_python_output(context, body, execcontext, guess_line)
  449. # decide whether to show the result
  450. return (out + code_result).strip() if "noresult" not in opts else (out).strip()
  451. return python_tag_handler
  452. def handle_includes(context, text):
  453. """
  454. Handles all `<include>` tags in the provided text, replacing them with the
  455. contents of the files they reference.
  456. **Parameters:**
  457. * `context`: the context associated with this request
  458. * `text`: a string containing the raw HTML source of the page
  459. **Returns:** a string representing the updated HTML after includes have
  460. been handled
  461. """
  462. # we'll handle paths relative to here unless given an absolute path
  463. def _include_handler(match):
  464. base_dir = dispatch.content_file_location(context, context["cs_path_info"])
  465. base_dir = os.path.realpath(os.path.dirname(base_dir))
  466. b = match.groupdict()["body"]
  467. replacements = []
  468. for fname in b.splitlines():
  469. fname = fname.strip()
  470. if not fname:
  471. continue # skip blank lines
  472. fname = os.path.join(base_dir, fname)
  473. fname = os.path.realpath(fname)
  474. if os.path.commonprefix([fname, base_dir]) != base_dir:
  475. # tried to escape the course
  476. continue
  477. if not os.path.isfile(fname):
  478. continue
  479. with open(fname) as f:
  480. replacements.append(f.read())
  481. return "\n\n".join(replacements)
  482. return re.sub(_environment_matcher("include"), _include_handler, text)
  483. def handle_python_tags(context, text):
  484. """
  485. Process all Python-related custom tags.
  486. Firstly, each `@{}` is translated into an appropriate `<printf>` tag.
  487. Then, `<python>` and `<printf>` tags are handled sequentially, each being
  488. replaced with its output after having its code evaluated in the current
  489. context (using `catsoop.language.get_python_output`).
  490. **Parameters:**
  491. * `context`: the context associated with this request
  492. * `text`: a string containing the raw HTML source of the page
  493. **Returns:** a string representing the updated HTML after python tags have
  494. been handled
  495. """
  496. def printf_handler(x):
  497. g = x.groupdict()
  498. return "%s<printf %s>%s</printf>" % (
  499. g.get("lead", ""),
  500. g.get("fmt", None) or "%s",
  501. g["body"],
  502. )
  503. text = re.sub(PYVAR_REGEX, printf_handler, text)
  504. text = re.sub(PYTHON_REGEX, _make_python_handler(context, text), text)
  505. return text.replace(r"\@{", "@{")
  506. def handle_custom_tags(context, text):
  507. """
  508. Process custom HTML tags
  509. This function begins by calling `cs_course_handle_custom_tags` on the input
  510. text, so that courses can implement their own custom HTML tags. This
  511. function is responsible for handling the following custom tags:
  512. * `<chapter>`, `<section>`, `<subsection>`, etc.
  513. * `<chapter*>`, `<section*>`, etc.
  514. * `<ref>`
  515. * `<tableofcontents/>`
  516. * `<footnote>`
  517. * `<showhide>`
  518. * `<math>` and `<displaymath>`
  519. It also takes care of making sure links, images, etc are referencing real
  520. URLs instead of internal URLs, and also for making sure that syntax
  521. highlighting is approprtiately applied for code snippets.
  522. It is not responsible for handling Python tags or includes (which are
  523. handled elsewhere, before this function is invoked).
  524. **Parameters:**
  525. * `context`: the context associated with this request
  526. * `text`: a string containing the raw HTML source of the page, after
  527. running through the handler
  528. **Returns:** a string representing the updated HTML after custom tags have
  529. been handled
  530. """
  531. if "cs_course_handle_custom_tags" in context:
  532. text = context["cs_course_handle_custom_tags"](text)
  533. section = r"((?:chapter)|(?:(?:sub){0,2}section))"
  534. section_star = r"<(?P<tag>%s)\*>(?P<body>.*?)</(?P=tag)\*?>" % section
  535. section_star = re.compile(section_star, re.MULTILINE | re.DOTALL | re.IGNORECASE)
  536. tag_map = {
  537. "section": ("h2", 1),
  538. "subsection": ("h3", 2),
  539. "subsubsection": ("h4", 3),
  540. }
  541. def _section_star_matcher(x):
  542. d = x.groupdict()
  543. t = d["tag"].rstrip("*")
  544. b = d["body"]
  545. t = tag_map[t][0]
  546. return "<%s>%s</%s>" % (t, b, t)
  547. text = re.sub(section_star, _section_star_matcher, text)
  548. tree = BeautifulSoup(text, "html.parser")
  549. # handle sections, etc.
  550. labels = {}
  551. textsections = [0, 0, 0]
  552. chapter = None
  553. toc_sections = []
  554. all_title_links = set()
  555. for i in tree.find_all(re.compile(section)):
  556. if i.name == "chapter":
  557. chapter = i.attrs.get("num", "0")
  558. tag = "h1"
  559. num = str(chapter)
  560. else:
  561. if i.name == "section":
  562. textsections[0] += 1
  563. textsections[1] = 0
  564. elif i.name == "subsection":
  565. textsections[1] += 1
  566. textsections[2] = 0
  567. elif i.name == "subsubsection":
  568. textsections[2] += 1
  569. tag, lim = tag_map[i.name]
  570. to_num = textsections[:lim]
  571. if chapter is not None:
  572. to_num.insert(0, chapter)
  573. num = ".".join(map(str, to_num))
  574. linknum = num.replace(".", "_")
  575. linkname = "catsoop_section_%s" % linknum
  576. title = i.text
  577. linkname_2 = _safe_title(title, all_title_links)
  578. lbl = i.attrs.get("label", None)
  579. if lbl is not None:
  580. labels[lbl] = {
  581. "type": i.name,
  582. "number": num,
  583. "title": i.decode_contents(),
  584. "link": "#%s" % linkname_2,
  585. }
  586. toc_sections.append((num, linkname_2, i))
  587. sec = copy.copy(i)
  588. sec.name = tag
  589. sec["class"] = "cs_section_title"
  590. sec.insert(0, "%s) " % num)
  591. if lbl is not None:
  592. sec.attrs["id"] = "catsoop_label_%s" % lbl
  593. i.replace_with(sec)
  594. if context.get("cs_show_section_permalinks", False):
  595. permalink = tree.new_tag("a")
  596. permalink["class"] = "cs_permalink"
  597. permalink.attrs["href"] = "#%s" % linkname_2
  598. permalink.string = "§"
  599. sec.append(permalink)
  600. # references
  601. link = tree.new_tag("a")
  602. link["class"] = "anchor"
  603. link.attrs["name"] = linkname
  604. sec.insert_before(link)
  605. link = tree.new_tag("a")
  606. link["class"] = "anchor"
  607. link.attrs["name"] = linkname_2
  608. sec.insert_before(link)
  609. # handle refs
  610. for i in tree.find_all("ref"):
  611. if "label" not in i.attrs:
  612. lbl = list(i.attrs.keys())[0]
  613. else:
  614. lbl = i.attrs["label"]
  615. body = i.decode_contents().strip() or '<a href="{link}">{type} {number}</a>'
  616. body = body.format(**labels[lbl])
  617. new = BeautifulSoup(body, "html.parser")
  618. i.replace_with(new)
  619. # handle table of contents
  620. for ix, i in enumerate(tree.find_all("tableofcontents")):
  621. o_toc_dom = toc_dom = tree.new_tag("ul")
  622. last_handled_len = 0
  623. for (num, ref, elt) in toc_sections:
  624. n = len(num.strip().split(".")) # number of layers deep
  625. if n > last_handled_len and last_handled_len != 0:
  626. # want a new level of indentation
  627. ltoc_dom = toc_dom
  628. toc_dom = tree.new_tag("ul")
  629. ltoc_dom.append(toc_dom)
  630. while n < last_handled_len:
  631. toc_dom = toc_dom.parent
  632. last_handled_len -= 1
  633. last_handled_len = n
  634. toc_entry = tree.new_tag("li")
  635. link = copy.copy(elt)
  636. link.name = "a"
  637. link["href"] = "#%s" % ref
  638. link.insert(0, "%s) " % num)
  639. toc_entry.append(link)
  640. toc_dom.append(toc_entry)
  641. toc_sec = tree.new_tag("h2")
  642. toc_sec.string = "Table of Contents"
  643. i.replace_with(toc_sec)
  644. toc_sec.insert_after(o_toc_dom)
  645. # footnotes
  646. footnotes = []
  647. for ix, i in enumerate(tree.find_all("footnote")):
  648. jx = ix + 1
  649. footnotes.append(i.decode_contents())
  650. sup = tree.new_tag("sup")
  651. sup.string = str(jx)
  652. i.replace_with(sup)
  653. link = tree.new_tag("a", href="#catsoop_footnote_%d" % jx)
  654. sup.wrap(link)
  655. ref = tree.new_tag("a")
  656. ref.attrs["name"] = "catsoop_footnote_ref_%d" % jx
  657. ref["class"] = "anchor"
  658. link.insert_before(ref)
  659. if len(footnotes) == 0:
  660. fnote = ""
  661. else:
  662. fnote = '<br/>&nbsp;<hr/><b name="cs_footnotes">Footnotes</b>'
  663. for (ix, f) in enumerate(footnotes):
  664. ix = ix + 1
  665. fnote += (
  666. '<p><a class="anchor" name="catsoop_footnote_%d"></a><sup style="padding-right:0.25em;color:var(--cs-base-bg-color);">%d</sup>'
  667. '%s <a href="#catsoop_footnote_ref_%d">'
  668. '<span class="noprint">(click to return to text)</span>'
  669. "</a></p>"
  670. ) % (ix, ix, f, ix)
  671. if not context.get("cs_footnotes", ""):
  672. context["cs_footnotes"] = fnote
  673. # hints (<showhide>)
  674. def _md5(x):
  675. return hashlib.md5(x.encode()).hexdigest()
  676. for ix, i in enumerate(tree.find_all("showhide")):
  677. i.name = "div"
  678. i.attrs["style"] = "display:none;"
  679. contents = i.decode_contents()
  680. i.clear()
  681. i.append(
  682. BeautifulSoup(source_transform_string(context, contents), "html.parser")
  683. )
  684. wrap = tree.new_tag("div")
  685. wrap["class"] = ["response"]
  686. i.wrap(wrap)
  687. button = tree.new_tag(
  688. "button",
  689. onclick="if(this.nextSibling.style.display === 'none'){this.nextSibling.style.display = 'block';}else{this.nextSibling.style.display = 'none';}",
  690. )
  691. button.string = "Show/Hide"
  692. i.insert_before(button)
  693. # custom URL handling in img, a, script, link
  694. URL_FIX_LIST = [("img", "src"), ("a", "href"), ("script", "src"), ("link", "href")]
  695. for (tag, field) in URL_FIX_LIST:
  696. for i in tree.find_all(tag):
  697. if field in i.attrs:
  698. i.attrs[field] = dispatch.get_real_url(context, i.attrs[field])
  699. # math tags
  700. handle_math_tags(tree)
  701. # code blocks: specific default behavior
  702. default_code_class = context.get("cs_default_code_language", "nohighlight")
  703. if default_code_class is not None:
  704. for i in tree.find_all("code"):
  705. if i.parent.name != "pre":
  706. continue
  707. if "class" in i.attrs and (
  708. isinstance(i.attrs["class"], str) or len(i.attrs["class"]) > 0
  709. ):
  710. # this already has a class; skip!
  711. continue
  712. i.attrs["class"] = [default_code_class]
  713. return str(tree)
  714. def handle_math_tags(tree):
  715. """
  716. Handles `<math>` and `<displaymath>` tags, replacing them with `<span>` and
  717. `<div>` elements with appropriate classes so the Javascript math renderer
  718. can find them.
  719. **Parameters:**
  720. * `context`: the context associated with this request
  721. * `text`: a string containing the raw HTML source of the page
  722. **Returns:** a string representing the updated HTML after math tags have
  723. been handled
  724. """
  725. for i in tree.find_all(re.compile("(?:display)?math")):
  726. i["class"] = i.get("class", [])
  727. if i.name == "math":
  728. i.name = "span"
  729. else:
  730. i.name = "div"
  731. i.attrs["style"] = "text-align:center;padding-bottom:10px;"
  732. i["class"].append("cs_displaymath")
  733. i["class"].append("cs_math_to_render")
  734. return tree