CAT-SOOP is a flexible, programmable learning management system based on the Python programming language. https://catsoop.mit.edu
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1005 lines
33 KiB

  1. # This file is part of CAT-SOOP
  2. # Copyright (c) 2011-2020 by The CAT-SOOP Developers <catsoop-dev@mit.edu>
  3. #
  4. # This program is free software: you can redistribute it and/or modify it under
  5. # the terms of the GNU Affero General Public License as published by the Free
  6. # Software Foundation, either version 3 of the License, or (at your option) any
  7. # later version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT
  10. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  12. # details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. """
  17. Handling of the CAT-SOOP specification language(s): Markdown, XML, and Python
  18. The real goal of parsing of a page's source is to convert it back to the
  19. original Python specification format. Markdown is translated to XML, which is
  20. translated to Python. The overall flow when parsing a page is:
  21. 1. If the content file is in Markdown, parse it down to HTML.
  22. 2. If the content file was in Markdown or XML, parse it down to Python
  23. (stripping out comments and seperating &lt;question&gt; tags into
  24. appropriate calls to `catsoop.tutor.question`).
  25. """
  26. import os
  27. import re
  28. import ast
  29. import sys
  30. import copy
  31. import json
  32. import random
  33. import string
  34. import hashlib
  35. import traceback
  36. from io import StringIO
  37. from collections import OrderedDict
  38. from . import tutor
  39. from . import dispatch
  40. from . import markdown_math
  41. from .errors import html_format, clear_info
  42. import markdown
  43. from markdown.extensions import tables
  44. from markdown.extensions import fenced_code
  45. from markdown.extensions import sane_lists
  46. from bs4 import BeautifulSoup
  47. from unidecode import unidecode
  48. _nodoc = {
  49. "BeautifulSoup",
  50. "OrderedDict",
  51. "StringIO",
  52. "clear_info",
  53. "html_format",
  54. "PYTHON_REGEX",
  55. "PYVAR_REGEX",
  56. "remove_common_leading_whitespace",
  57. "source_formats",
  58. "source_format_string",
  59. }
  60. _malformed_question = "<font color='red'>malformed <tt>question</tt></font>"
  61. _valid_qname = re.compile(r"^[A-Za-z][_A-Za-z0-9]*$")
  62. _unsafe_title = re.compile(r"[^A-Za-z0-9_]")
  63. def _md5(x):
  64. return hashlib.md5(x.encode("utf-8")).hexdigest()
  65. def _safe_title(t, disallowed=None):
  66. disallowed = disallowed if disallowed is not None else set()
  67. title = otitle = "_%s" % (
  68. re.sub(r"_+", "_", _unsafe_title.sub("_", unidecode(t))).lower().strip("_")
  69. )
  70. count = 2
  71. while title in disallowed:
  72. title = "%s_%d" % (otitle, count)
  73. count += 1
  74. disallowed.add(title)
  75. return title
  76. def xml_pre_handle(context):
  77. """
  78. Translate the value in `cs_content` from XML to Python, storing the result
  79. as `cs_problem_spec` in the given context.
  80. This function mostly strips out comments and converts &lt;question&gt; tags
  81. into appropriate calls to `catsoop.tutor.question`.
  82. **Parameters:**
  83. * `context`: the context associated with this request (from which
  84. `cs_content` is taken)
  85. **Returns:** `None`
  86. """
  87. text = context["cs_content"]
  88. text = re.sub(_environment_matcher("comment"), "", text)
  89. tmp = text.split("<question")
  90. qcount = 0
  91. o = [tmp[0]]
  92. for piece in tmp[1:]:
  93. chunks = piece.strip().split(">", 1)
  94. if len(chunks) != 2:
  95. o.append(_malformed_question)
  96. break
  97. type_, rest = chunks
  98. otherrest = rest.split("</question>", 1)
  99. if len(otherrest) != 2:
  100. o.append(_malformed_question)
  101. break
  102. code, rest = otherrest
  103. e = dict(context)
  104. try:
  105. code = remove_common_leading_whitespace(code)
  106. if isinstance(code, int):
  107. o.append(
  108. (
  109. "<div><font color='red'><b>A Python Error Occurred:</b></font>"
  110. "<p><pre>"
  111. "Inconsistent indentation on line %d of question tag"
  112. "</pre></p></div>"
  113. )
  114. % code
  115. )
  116. o.append(rest)
  117. continue
  118. exec(code, e)
  119. if "csq_name" not in e:
  120. e["csq_name"] = "q%06d" % qcount
  121. qcount += 1
  122. if _valid_qname.match(e["csq_name"]):
  123. if type_ != "dummy":
  124. o.append(tutor.question(context, type_, **e))
  125. else:
  126. o.append(
  127. (
  128. '<div class="question">'
  129. '<font color="red">'
  130. "ERROR: Invalid question name <code>%r</code>"
  131. "</font></div>"
  132. )
  133. % e["csq_name"]
  134. )
  135. except:
  136. e = sys.exc_info()
  137. tb_entries = traceback.extract_tb(e[2])
  138. fname, lineno, func, text = tb_entries[-1]
  139. exc_only = traceback.format_exception_only(e[0], e[1])
  140. if e[0] == SyntaxError:
  141. tb_text = "Syntax error in question tag:\n"
  142. elif func == "<module>":
  143. tb_text = "Error on line %d of question tag." % lineno
  144. try:
  145. tb_text += "\n %s\n\n" % code.splitlines()[lineno - 1].strip()
  146. except:
  147. pass
  148. else:
  149. tb_text = context["csm_errors"].error_message_content(
  150. context, html=False
  151. )
  152. exc_only = [""]
  153. tb_text = "".join([tb_text] + exc_only)
  154. err = html_format(clear_info(context, tb_text))
  155. ret = (
  156. "<div><font color='red'>"
  157. "<b>A Python Error Occurred:</b>"
  158. "<p><pre>%s</pre><p>"
  159. "</font></div>"
  160. ) % err
  161. o.append(ret)
  162. o.append(rest)
  163. context["cs_problem_spec"] = o
  164. def _md(x):
  165. o = markdown.markdown(
  166. x,
  167. extensions=[
  168. tables.TableExtension(),
  169. fenced_code.FencedCodeExtension(),
  170. sane_lists.SaneListExtension(),
  171. markdown_math.MathExtension(),
  172. ],
  173. )
  174. return o
  175. def md_pre_handle(context, xml=True):
  176. """
  177. Translate the value in `cs_content` from Markdown to HTML
  178. **Parameters:**
  179. * `context`: the context associated with this request (from which
  180. `cs_content` is taken)
  181. **Optional Parameters:**
  182. * `xml` (default `True`): whether `catsoop.language.xml_pre_handle` should
  183. be invoked after translating to HTML
  184. **Returns:** `None`
  185. """
  186. text = context["cs_content"]
  187. text = re.sub(_environment_matcher("comment"), "", text)
  188. text = _md_format_string(context, text, False)
  189. context["cs_content"] = text
  190. if xml:
  191. xml_pre_handle(context)
  192. def py_pre_handle(context):
  193. """
  194. 'Pre-handler' for Python.
  195. This function exists to mirror the interface of `md_pre_handle` and
  196. `xml_pre_handle`, but it does nothing (since the `cs_problem_spec` does not
  197. need any additional processing at this point).
  198. **Parameters:**
  199. * `context`: the context associated with this request (from which
  200. `cs_content` is taken)
  201. **Returns:** `None`
  202. """
  203. pass
  204. DIAGRAM_START = re.compile(r"\*{5}\**")
  205. def _replace_diagrams(src):
  206. if not DIAGRAM_START.search(src):
  207. # try to short-circuit; this is probably faster than splitting and
  208. # looping in the case where we have no diagrams.
  209. return src, []
  210. ix = 0
  211. lines = src.splitlines(keepends=True)
  212. diagrams = {}
  213. while ix < len(lines):
  214. line = lines[ix]
  215. match = DIAGRAM_START.search(line)
  216. if not match:
  217. ix += 1
  218. continue
  219. # if we're here, we found something that looks like the start of a
  220. # diagram. look for a match.
  221. firstline = ix
  222. firstix, lastix = match.span()
  223. group = match.group(0)
  224. jx = ix + 1
  225. maybe_diagram = False
  226. lastline = None
  227. while True:
  228. if jx >= len(lines):
  229. # we got here without hitting our terminating condition, so
  230. # this wasn't actually a diagram. skip.
  231. break
  232. if firstix >= len(lines[jx]) or lines[jx][firstix] != "*":
  233. # no * on the left hand side; this must not have been a diagram
  234. # after all.
  235. break
  236. if lines[jx][firstix:lastix] == group:
  237. # this looks like a string of *'s. we're done, and we found a
  238. # diagram!
  239. lastline = (
  240. jx + 1
  241. ) # + 1 so this is exclusive to match span (loops below become easier)
  242. maybe_diagram = True
  243. break
  244. jx += 1
  245. # if we're out here, we left the loop. if we're still considering
  246. # whether something could be a diagram, make sure we've got either a
  247. # solid border of *'s, or an open right-hand side with nothing beyond
  248. # the right-most asterisk (this does not quite match Markdeep's
  249. # heuristic, but I think it makes a lot more sense)
  250. if maybe_diagram:
  251. all_closed = True
  252. trailing_text = False
  253. leading_text = False
  254. for l in range(firstline, lastline):
  255. post = lines[l][lastix:]
  256. if post and not post.isspace():
  257. trailing_text = True
  258. pre = lines[l][:firstix]
  259. if pre and not pre.isspace():
  260. leading_text = True
  261. if lastix >= len(lines[l]) or lines[l][lastix - 1] != "*":
  262. all_closed = False
  263. if all_closed or not trailing_text:
  264. # we found a diagram. now remove it and replace with a <pre> tag
  265. # containing the source (our JS will pick this up after the page
  266. # loads)...
  267. alignment = "center"
  268. if leading_text:
  269. alignment = "floatright"
  270. elif trailing_text:
  271. alignment = "floatleft"
  272. diagram_source = []
  273. term = lastix - 1 if all_closed else lastix
  274. for l in range(firstline, lastline):
  275. if l != firstline and l != lastline - 1:
  276. diagram_source.append(lines[l].rstrip("\n")[firstix + 1 : term])
  277. lines[l] = (
  278. "%s%s" % (lines[l][:firstix], lines[l][lastix:])
  279. ).rstrip() + "\n"
  280. this_source = "\n".join(diagram_source)
  281. hash_ = _md5(this_source)
  282. tag = (
  283. '<div class="cs-diagram-source" diagramalign="%s">Placeholder for Diagram <code class="cs-diagram-id">%s</code></div>\n'
  284. % (alignment, hash_,)
  285. )
  286. diagrams[hash_] = this_source
  287. lines.insert(firstline, tag)
  288. ix = lastline
  289. ix += 1
  290. return "".join(lines), diagrams
  291. def _md_format_string(context, s, xml=True):
  292. # generate a unique string to split around
  293. splitter = None
  294. while splitter is None or splitter in s:
  295. splitter = "".join(random.choice(string.ascii_letters) for i in range(20))
  296. # extract tags, replace with splitter
  297. tag_contents = []
  298. def _replacer(m):
  299. tag_contents.append(m.groups())
  300. return splitter
  301. tags_to_replace = context.get("cs_markdown_ignore_tags", tuple())
  302. tags = ("pre", "question", "(?:display)?math", "script", "showhide") + tuple(
  303. tags_to_replace
  304. )
  305. checker = re.compile(
  306. r"<(%s)(.*?)>(.*?)</\1>" % "|".join(tags), re.MULTILINE | re.DOTALL
  307. )
  308. text = re.sub(checker, _replacer, s)
  309. # parse diagrams
  310. text, diagram_sources = _replace_diagrams(text)
  311. # run through markdown
  312. text = _md(text)
  313. num_tags = len(tag_contents)
  314. pieces = text.split(splitter)
  315. o = ""
  316. for ix, piece in enumerate(pieces):
  317. o += piece
  318. if ix < num_tags:
  319. t, r, b = tag_contents[ix]
  320. o += "<%s%s>%s</%s>" % (t, r, b, t)
  321. text = o
  322. if text.startswith("<p>") and text.endswith("</p>"):
  323. text = text[3:-4]
  324. if diagram_sources:
  325. script = "\n".join(
  326. "catsoop.diagram_sources[%s] = %s;" % (json.dumps(k), json.dumps(v))
  327. for k, v in diagram_sources.items()
  328. )
  329. text = '%s<script type="text/javascript">%s</script>' % (text, script)
  330. return _xml_format_string(context, text) if xml else text
  331. def _xml_format_string(context, s):
  332. return handle_custom_tags(context, s)
  333. source_formats = OrderedDict(
  334. [
  335. ("catsoop", md_pre_handle),
  336. ("md", md_pre_handle),
  337. ("xml", xml_pre_handle),
  338. ("py", py_pre_handle),
  339. ]
  340. )
  341. """OrderedDict mapping source format names to formatting handlers"""
  342. source_format_string = OrderedDict(
  343. [
  344. ("catsoop", _md_format_string),
  345. ("md", _md_format_string),
  346. ("xml", _xml_format_string),
  347. ("py", _xml_format_string),
  348. ]
  349. )
  350. """OrderedDict mapping source format names to formatters"""
  351. def source_transform_string(context, s):
  352. """
  353. Convert the given string to HTML, based on the syntax associated with the
  354. type of the current content file.
  355. If the content file is Markdown, this will translate the string into HTML
  356. and handle custom tags. If the content file is in HTML or Python, custom
  357. tags will be handled, but no other translation will occur.
  358. **Parameters:**
  359. * `context`: the context associated with this request
  360. * `s`: the string to be translated to HTML
  361. **Returns:** the translated string
  362. """
  363. src_format = context.get("cs_source_format", None)
  364. if src_format is not None:
  365. return source_format_string[src_format](context, s)
  366. else:
  367. return s
  368. # Handling of custom XML tags
  369. def _environment_matcher(tag):
  370. return re.compile(
  371. """<%s>(?P<body>.*?)</%s>""" % (tag, tag),
  372. re.MULTILINE | re.DOTALL | re.IGNORECASE,
  373. )
  374. _matcher = r"[\#0\- +]*\d*(?:.\d+)?[hlL]?[diouxXeEfFgGcrs]"
  375. _matcher = r"(?:%%%s|%s)?" % (_matcher, _matcher)
  376. _pyvar_matcher = r"(?P<lead>^|[^\\])@(?P<fmt>%s){(?P<body>.+?)}" % _matcher
  377. PYVAR_REGEX = re.compile(_pyvar_matcher, re.DOTALL | re.IGNORECASE)
  378. """Regular expression for matching `@{}` syntax"""
  379. PYTHON_REGEX = re.compile(
  380. r"""<(?P<tag>python|printf) *(?P<opts>.*?)>(?P<body>.*?)</(?P=tag)>""",
  381. re.MULTILINE | re.DOTALL | re.IGNORECASE,
  382. )
  383. """Regular expression for matching &lt;python&gt; tags"""
  384. def remove_common_leading_whitespace(x):
  385. lines = x.splitlines()
  386. if len(lines) == 0:
  387. return ""
  388. for ix in range(len(lines)):
  389. if lines[ix].strip():
  390. break
  391. first_ix = ix
  392. candidate = re.match(_indent_regex, lines[first_ix])
  393. if candidate is None:
  394. return x
  395. candidate = candidate.group(1)
  396. for ix, i in enumerate(lines):
  397. if ix < first_ix or not i.strip():
  398. continue
  399. if not i.startswith(candidate):
  400. return ix
  401. lc = len(candidate)
  402. return "\n".join(i[lc:] for i in lines)
  403. def _tab_replacer(x):
  404. return x.group(1).replace("\t", " ")
  405. _indent_regex = re.compile(r"^(\s*)")
  406. def _replace_indentation_tabs(x):
  407. return re.sub(_indent_regex, _tab_replacer, x)
  408. _string_regex = re.compile(
  409. r"""(\"\"\"[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*\"\"\"|'''[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''|'[^\n'\\]*(?:\\.[^\n'\\]*)*'|"[^\n"\\]*(?:\\.[^\n"\\]*)*")""",
  410. re.MULTILINE | re.DOTALL,
  411. )
  412. def indent_code(c):
  413. strings = {}
  414. # start by removing strings and replacing them with unique character sequences
  415. def _replacer(x):
  416. new_id = None
  417. while new_id is None or new_id in strings or new_id in c:
  418. new_id = "".join(random.choice(string.ascii_letters) for i in range(20))
  419. strings[new_id] = x.group(1)
  420. return new_id
  421. c = re.sub(_string_regex, _replacer, c)
  422. # now that strings are out of the way, change the indentation of every line
  423. c = "\n".join(" %s" % _replace_indentation_tabs(i) for i in c.splitlines())
  424. c = " pass\n%s" % c
  425. # finally, reintroduce strings
  426. for k, v in strings.items():
  427. c = c.replace(k, v)
  428. return c
  429. def get_python_output(context, code, variables, line_offset=0):
  430. """
  431. Helper function. Evaluate code in the given environment, and return its
  432. output, if any.
  433. Makes use of a special variable `cs___WEBOUT`, which is a file-like
  434. object. Any data written to `cs___WEBOUT` will be returned. Overwrites
  435. `print` in the given environment so that it outputs to `cs___WEBOUT`
  436. instead of to stdout.
  437. **Parameters:**
  438. * `context`: the context associated with this request
  439. * `code`: a strin containing the Python code to be executed
  440. * `variables`: a dictionary representing the environment in which the code
  441. should be executed
  442. **Optional Parameters**:
  443. * `line_offset` (default `0`): the offset, in lines, of this code's
  444. &lt;python&gt; tag from the top of the source file; used in case an error
  445. occurs, to try to point authors to the right location in the original
  446. source file
  447. **Returns:** a string containing any values written to `cs___WEBOUT`
  448. """
  449. variables.update({"cs___WEBOUT": StringIO()})
  450. try:
  451. code = remove_common_leading_whitespace(code)
  452. if isinstance(code, int):
  453. return (
  454. "<div><font color='red'><b>A Python Error Occurred:</b></font>"
  455. "<p><pre>"
  456. "Inconsistent indentation on line %d of python tag (line %d of source)"
  457. "</pre></p></div>"
  458. ) % (code, code + line_offset + 1)
  459. code = indent_code(code)
  460. code = (
  461. (
  462. "_cs_oprint = print\n"
  463. "def myprint(*args, **kwargs):\n"
  464. ' if "file" not in kwargs:\n'
  465. ' kwargs["file"] = cs___WEBOUT\n'
  466. " _cs_oprint(*args, **kwargs)\n"
  467. "print = myprint\n"
  468. "try:\n\n"
  469. )
  470. + code
  471. + (
  472. "\nexcept Exception as e:\n"
  473. " raise e\n"
  474. "finally:\n"
  475. " print = _cs_oprint"
  476. )
  477. )
  478. code = code.replace("tutor.init_random()", "tutor.init_random(globals())")
  479. code = code.replace("tutor.question(", "tutor.question(globals(),")
  480. exec(code, variables)
  481. return variables["cs___WEBOUT"].getvalue()
  482. except:
  483. e = sys.exc_info()
  484. tb_entries = traceback.extract_tb(e[2])
  485. fname, lineno, func, text = tb_entries[-1]
  486. exc_only = traceback.format_exception_only(e[0], e[1])
  487. if e[0] == SyntaxError:
  488. tb_text = "Syntax error in Python tag:\n"
  489. def lineno_replacer(x):
  490. return "line %d" % (ast.literal_eval(x.group(1)) - 9)
  491. exc_only = [re.sub(r"line (\d)+", lineno_replacer, i) for i in exc_only]
  492. elif func == "<module>":
  493. tb_text = (
  494. "Error on line %d of Python tag (line %d of source):\n %s\n\n"
  495. % (
  496. lineno - 9,
  497. lineno + line_offset - 8,
  498. code.splitlines()[lineno - 1].strip(),
  499. )
  500. )
  501. else:
  502. tb_text = context["csm_errors"].error_message_content(context, html=False)
  503. exc_only = [""]
  504. tb_text = "".join([tb_text] + exc_only)
  505. err = html_format(clear_info(context, tb_text))
  506. ret = (
  507. "<div><font color='red'>"
  508. "<b>A Python Error Occurred:</b>"
  509. "<p><pre>%s</pre><p>"
  510. "</font></div>"
  511. ) % (err,)
  512. return ret
  513. def _make_python_handler(context, fulltext):
  514. if "cs__python_envs" not in context:
  515. context["cs__python_envs"] = {}
  516. def python_tag_handler(match):
  517. execcontext = context
  518. guess_line = fulltext[: match.start()].count("\n")
  519. # guess_line = 0
  520. d = match.groupdict()
  521. opts = (d["opts"] or "").strip().split(" ")
  522. body = d["body"]
  523. if d["tag"] == "printf":
  524. if len(opts) == 1 and opts[0] == "":
  525. f = "%s"
  526. else:
  527. f = opts[0]
  528. body = "print(%r %% (%s,))" % (f, body)
  529. opts = []
  530. out = ""
  531. # decide whether to show the code
  532. if "show" in opts:
  533. opts.remove("show")
  534. code = '<pre><code class="lang-python">%s</code></pre>'
  535. out += code % html_format(body)
  536. # decide whether to run the code
  537. if "norun" in opts:
  538. return (out).strip()
  539. # decide in which environment the code should be run
  540. for i in opts:
  541. if i.startswith("env="):
  542. envname = "=".join(i.split("=")[1:])
  543. if envname not in context["cs__python_envs"]:
  544. context["cs__python_envs"][envname] = {}
  545. execcontext = context["cs__python_envs"][envname]
  546. # run the code
  547. code_result = get_python_output(context, body, execcontext, guess_line)
  548. # decide whether to show the result
  549. return (out + code_result).strip() if "noresult" not in opts else (out).strip()
  550. return python_tag_handler
  551. def handle_includes(context, text):
  552. """
  553. Handles all `<include>` tags in the provided text, replacing them with the
  554. contents of the files they reference.
  555. **Parameters:**
  556. * `context`: the context associated with this request
  557. * `text`: a string containing the raw HTML source of the page
  558. **Returns:** a string representing the updated HTML after includes have
  559. been handled
  560. """
  561. # we'll handle paths relative to here unless given an absolute path
  562. def _include_handler(match):
  563. base_dir = dispatch.content_file_location(context, context["cs_path_info"])
  564. base_dir = os.path.realpath(os.path.dirname(base_dir))
  565. b = match.groupdict()["body"]
  566. replacements = []
  567. for fname in b.splitlines():
  568. fname = fname.strip()
  569. if not fname:
  570. continue # skip blank lines
  571. fname = os.path.join(base_dir, fname)
  572. fname = os.path.realpath(fname)
  573. if os.path.commonprefix([fname, base_dir]) != base_dir:
  574. # tried to escape the course
  575. continue
  576. if not os.path.isfile(fname):
  577. continue
  578. with open(fname) as f:
  579. replacements.append(f.read())
  580. return "\n\n".join(replacements)
  581. return re.sub(_environment_matcher("include"), _include_handler, text)
  582. def handle_python_tags(context, text):
  583. """
  584. Process all Python-related custom tags.
  585. Firstly, each `@{}` is translated into an appropriate `<printf>` tag.
  586. Then, `<python>` and `<printf>` tags are handled sequentially, each being
  587. replaced with its output after having its code evaluated in the current
  588. context (using `catsoop.language.get_python_output`).
  589. **Parameters:**
  590. * `context`: the context associated with this request
  591. * `text`: a string containing the raw HTML source of the page
  592. **Returns:** a string representing the updated HTML after python tags have
  593. been handled
  594. """
  595. def printf_handler(x):
  596. g = x.groupdict()
  597. return "%s<printf %s>%s</printf>" % (
  598. g.get("lead", ""),
  599. g.get("fmt", None) or "%s",
  600. g["body"],
  601. )
  602. text = re.sub(PYVAR_REGEX, printf_handler, text)
  603. text = re.sub(PYTHON_REGEX, _make_python_handler(context, text), text)
  604. return text.replace(r"\@{", "@{")
  605. def handle_custom_tags(context, text):
  606. """
  607. Process custom HTML tags
  608. This function begins by calling `cs_course_handle_custom_tags` on the input
  609. text, so that courses can implement their own custom HTML tags. This
  610. function is responsible for handling the following custom tags:
  611. * `<chapter>`, `<section>`, `<subsection>`, etc.
  612. * `<chapter*>`, `<section*>`, etc.
  613. * `<ref>`
  614. * `<tableofcontents/>`
  615. * `<footnote>`
  616. * `<showhide>`
  617. * `<math>` and `<displaymath>`
  618. It also takes care of making sure links, images, etc are referencing real
  619. URLs instead of internal URLs, and also for making sure that syntax
  620. highlighting is approprtiately applied for code snippets.
  621. It is not responsible for handling Python tags or includes (which are
  622. handled elsewhere, before this function is invoked).
  623. **Parameters:**
  624. * `context`: the context associated with this request
  625. * `text`: a string containing the raw HTML source of the page, after
  626. running through the handler
  627. **Returns:** a string representing the updated HTML after custom tags have
  628. been handled
  629. """
  630. if "cs_course_handle_custom_tags" in context:
  631. text = context["cs_course_handle_custom_tags"](text)
  632. section = r"((?:chapter)|(?:(?:sub){0,2}section))"
  633. section_star = r"<(?P<tag>%s)\*>(?P<body>.*?)</(?P=tag)\*?>" % section
  634. section_star = re.compile(section_star, re.MULTILINE | re.DOTALL | re.IGNORECASE)
  635. tag_map = {
  636. "section": ("h2", 1),
  637. "subsection": ("h3", 2),
  638. "subsubsection": ("h4", 3),
  639. }
  640. def _section_star_matcher(x):
  641. d = x.groupdict()
  642. t = d["tag"].rstrip("*")
  643. b = d["body"]
  644. t = tag_map[t][0]
  645. return "<%s>%s</%s>" % (t, b, t)
  646. text = re.sub(section_star, _section_star_matcher, text)
  647. # hints (<showhide>)
  648. def _showhide_replacer(match):
  649. body = source_transform_string(context, match.groupdict()["body"])
  650. out = """<div class="response"><button class="btn-catsoop" onclick="if(this.parentElement.children[1].style.display === 'none'){this.parentElement.children[1].style.display = 'block';}else{this.parentElement.children[1].style.display = 'none';}">Show/Hide</button>\n"""
  651. out += '<div style="display:none;">%s</div>' % (body,)
  652. return out + "</div>"
  653. text = re.sub(_environment_matcher("showhide"), _showhide_replacer, text)
  654. tree = BeautifulSoup(text, "html.parser")
  655. # handle sections, etc.
  656. labels = {}
  657. textsections = [0, 0, 0]
  658. chapter = None
  659. toc_sections = []
  660. all_title_links = set()
  661. for i in tree.find_all(re.compile(section)):
  662. if i.name == "chapter":
  663. chapter = i.attrs.get("num", "0")
  664. tag = "h1"
  665. num = str(chapter)
  666. else:
  667. if i.name == "section":
  668. textsections[0] += 1
  669. textsections[1] = 0
  670. elif i.name == "subsection":
  671. textsections[1] += 1
  672. textsections[2] = 0
  673. elif i.name == "subsubsection":
  674. textsections[2] += 1
  675. tag, lim = tag_map[i.name]
  676. to_num = textsections[:lim]
  677. if chapter is not None:
  678. to_num.insert(0, chapter)
  679. num = ".".join(map(str, to_num))
  680. linknum = num.replace(".", "_")
  681. linkname = "catsoop_section_%s" % linknum
  682. title = i.text
  683. linkname_2 = _safe_title(title, all_title_links)
  684. lbl = i.attrs.get("label", None)
  685. if lbl is not None:
  686. labels[lbl] = {
  687. "type": i.name,
  688. "number": num,
  689. "title": i.decode_contents(),
  690. "link": "#%s" % linkname_2,
  691. }
  692. toc_sections.append((num, linkname_2, i))
  693. sec = copy.copy(i)
  694. sec.name = tag
  695. sec["class"] = "cs_section_title"
  696. sec.insert(0, "%s) " % num)
  697. if lbl is not None:
  698. sec.attrs["id"] = "catsoop_label_%s" % lbl
  699. i.replace_with(sec)
  700. if context.get("cs_show_section_permalinks", False):
  701. permalink = tree.new_tag("a")
  702. permalink["class"] = "cs_permalink"
  703. permalink.attrs["href"] = "#%s" % linkname_2
  704. permalink.string = "§"
  705. sec.append(permalink)
  706. # references
  707. link = tree.new_tag("a")
  708. link["class"] = "anchor"
  709. link.attrs["name"] = linkname
  710. sec.insert_before(link)
  711. link = tree.new_tag("a")
  712. link["class"] = "anchor"
  713. link.attrs["name"] = linkname_2
  714. sec.insert_before(link)
  715. # handle refs
  716. for i in tree.find_all("ref"):
  717. if "label" not in i.attrs:
  718. lbl = list(i.attrs.keys())[0]
  719. else:
  720. lbl = i.attrs["label"]
  721. body = i.decode_contents().strip() or '<a href="{link}">{type} {number}</a>'
  722. body = body.format(**labels[lbl])
  723. new = BeautifulSoup(body, "html.parser")
  724. i.replace_with(new)
  725. # handle table of contents
  726. for ix, i in enumerate(tree.find_all("tableofcontents")):
  727. o_toc_dom = toc_dom = tree.new_tag("ul")
  728. last_handled_len = 0
  729. for (num, ref, elt) in toc_sections:
  730. n = len(num.strip().split(".")) # number of layers deep
  731. if n > last_handled_len and last_handled_len != 0:
  732. # want a new level of indentation
  733. ltoc_dom = toc_dom
  734. toc_dom = tree.new_tag("ul")
  735. ltoc_dom.append(toc_dom)
  736. while n < last_handled_len:
  737. toc_dom = toc_dom.parent
  738. last_handled_len -= 1
  739. last_handled_len = n
  740. toc_entry = tree.new_tag("li")
  741. link = copy.copy(elt)
  742. link.name = "a"
  743. link["href"] = "#%s" % ref
  744. link.insert(0, "%s) " % num)
  745. toc_entry.append(link)
  746. toc_dom.append(toc_entry)
  747. toc_sec = tree.new_tag("h2")
  748. toc_sec.string = "Table of Contents"
  749. i.replace_with(toc_sec)
  750. toc_sec.insert_after(o_toc_dom)
  751. # footnotes
  752. footnotes = []
  753. for ix, i in enumerate(tree.find_all("footnote")):
  754. jx = ix + 1
  755. footnotes.append(i.decode_contents())
  756. sup = tree.new_tag("sup")
  757. sup.string = str(jx)
  758. i.replace_with(sup)
  759. link = tree.new_tag("a", href="#catsoop_footnote_%d" % jx)
  760. sup.wrap(link)
  761. ref = tree.new_tag("a")
  762. ref.attrs["name"] = "catsoop_footnote_ref_%d" % jx
  763. ref["class"] = "anchor"
  764. link.insert_before(ref)
  765. if len(footnotes) == 0:
  766. fnote = ""
  767. else:
  768. fnote = '<br/>&nbsp;<hr/><b name="cs_footnotes">Footnotes</b>'
  769. for (ix, f) in enumerate(footnotes):
  770. ix = ix + 1
  771. fnote += (
  772. '<p><a class="anchor" name="catsoop_footnote_%d"></a><sup style="padding-right:0.25em;color:var(--cs-base-bg-color);">%d</sup>'
  773. '%s <a href="#catsoop_footnote_ref_%d">'
  774. '<span class="noprint">(click to return to text)</span>'
  775. "</a></p>"
  776. ) % (ix, ix, f, ix)
  777. if not context.get("cs_footnotes", ""):
  778. context["cs_footnotes"] = fnote
  779. # custom URL handling in img, a, script, link
  780. URL_FIX_LIST = [("img", "src"), ("a", "href"), ("script", "src"), ("link", "href")]
  781. for (tag, field) in URL_FIX_LIST:
  782. for i in tree.find_all(tag):
  783. if field in i.attrs:
  784. i.attrs[field] = dispatch.get_real_url(context, i.attrs[field])
  785. # math tags
  786. handle_math_tags(tree)
  787. # code blocks: specific default behavior
  788. default_code_class = context.get("cs_default_code_language", "nohighlight")
  789. if default_code_class is not None:
  790. for i in tree.find_all("code"):
  791. if i.parent.name != "pre":
  792. continue
  793. if "class" in i.attrs and (
  794. isinstance(i.attrs["class"], str) or len(i.attrs["class"]) > 0
  795. ):
  796. # this already has a class; skip!
  797. continue
  798. i.attrs["class"] = [default_code_class]
  799. return str(tree)
  800. def handle_math_tags(tree):
  801. """
  802. Handles `<math>` and `<displaymath>` tags, replacing them with `<span>` and
  803. `<div>` elements with appropriate classes so the Javascript math renderer
  804. can find them.
  805. **Parameters:**
  806. * `context`: the context associated with this request
  807. * `text`: a string containing the raw HTML source of the page
  808. **Returns:** a string representing the updated HTML after math tags have
  809. been handled
  810. """
  811. for i in tree.find_all(re.compile("(?:display)?math")):
  812. i["class"] = i.get("class", [])
  813. try:
  814. if i.attrs["env"] in ["align", "align*", "eqnarray", "eqnarray*"]:
  815. i.string = "\\begin{aligned}%s\\end{aligned}" % i.string
  816. # currently ignoring other values of i.attrs["env"], namely, equation
  817. del i.attrs["env"]
  818. except KeyError:
  819. pass
  820. if i.name == "math": # (inline math)
  821. i.name = "span"
  822. else: # i.name == "displaymath" (display math)
  823. i.name = "div"
  824. i.attrs["style"] = "text-align:center;padding-bottom:10px;"
  825. i["class"].append("cs_displaymath")
  826. i["class"].append("cs_math_to_render")
  827. return tree