CAT-SOOP is a flexible, programmable learning management system based on the Python programming language. https://catsoop.mit.edu
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

603 lines
21 KiB

  1. # This file is part of CAT-SOOP
  2. # Copyright (c) 2011-2020 by The CAT-SOOP Developers <catsoop-dev@mit.edu>
  3. #
  4. # This program is free software: you can redistribute it and/or modify it under
  5. # the terms of the GNU Affero General Public License as published by the Free
  6. # Software Foundation, either version 3 of the License, or (at your option) any
  7. # later version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT
  10. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  12. # details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. """
  17. Functions for loading page specifications into dictionaries
  18. This file contains functions that do a lot of the "heavy lifting" associated
  19. with loading pages, including handling preload, managing parsing/evaluation of
  20. code in content files, and evaluation of plugins.
  21. """
  22. import os
  23. import re
  24. import sys
  25. import shutil
  26. import random
  27. import importlib
  28. import traceback
  29. from collections import OrderedDict
  30. from . import time
  31. from . import cslog
  32. from . import language
  33. from . import debug_log
  34. from . import base_context
  35. importlib.reload(base_context)
  36. def get_file_data(context, form, name):
  37. """
  38. Load the contents of a submission to the question with the given name in
  39. the given form, taking file upload preferences into account.
  40. Depending on the value of `cs_upload_management`, the data for a file might
  41. be stored directly on disk, or as part of a CAT-SOOP log. This function
  42. grabs the associated data as a bytestring.
  43. **Parameters:**
  44. * `context`: the context associated with this request
  45. * `form`: a dictionary mapping names to values, as in `cs_form`
  46. * `name`: the name of the question whose data we should grab
  47. **Returns:** a bytestring containing the data
  48. """
  49. data = form[name]
  50. up = context["cs_upload_management"]
  51. if isinstance(data, list):
  52. if up == "file":
  53. path = os.path.join(
  54. context["cs_data_root"], "_logs", "_uploads", data[1], "content"
  55. )
  56. with open(path, "rb") as f:
  57. data = f.read()
  58. return cslog.decompress_decrypt(data)
  59. elif up == "db":
  60. return context["csm_thirdparty"].data_uri.DataURI(data[1]).data
  61. else:
  62. raise Exception("unknown upload management style: %r" % up)
  63. elif isinstance(data, str):
  64. return data.encode()
  65. else: # bytes
  66. return data
  67. def clean_builtins(d):
  68. """
  69. Removes the `'__builtins__'` key from a dictionary to make it serializable
  70. **Parameters:**
  71. * `d`: the dictionary to clean
  72. **Returns:** `None`
  73. """
  74. try:
  75. del d["__builtins__"]
  76. except:
  77. pass
  78. def plugin_locations(context, course):
  79. """
  80. Look up the directories from which plugins should be loaded
  81. **Parameters:**
  82. * `context`: the context associated with this request
  83. * `course`: the course from which plugins should be loaded (or `None` if no
  84. course).
  85. **Returns:** a list of directories from which plugins should be loaded.
  86. """
  87. out = [
  88. os.path.join(context.get("cs_data_root", base_context.cs_data_root), "plugins")
  89. ]
  90. if course is not None:
  91. out.append(
  92. os.path.join(
  93. context.get("cs_data_root", base_context.cs_data_root),
  94. "courses",
  95. course,
  96. "__PLUGINS__",
  97. )
  98. )
  99. return out
  100. def available_plugins(context, course):
  101. """
  102. Determine all the plugins that can be loaded
  103. **Parameters:**
  104. * `context`: the context associated with this request
  105. * `course`: the course from which plugins should be loaded (or `None` if no
  106. course).
  107. **Returns:** a list of the full paths to all available plugins' directories
  108. """
  109. out = []
  110. for loc in plugin_locations(context, course):
  111. try:
  112. p = list(sorted(os.listdir(loc)))
  113. except:
  114. p = []
  115. for i in p:
  116. fullname = os.path.join(loc, i)
  117. if os.path.isdir(fullname):
  118. out.append(fullname)
  119. return out
  120. def get_plugin_code_file(plugin, type_):
  121. """
  122. Return the filename of a particular hook from the given plugin
  123. **Parameters:**
  124. * `plugin`: a string containing the name of a directory containing a plugin
  125. * `type_`: the name of a plugin hook as a string (e.g., `'post_load'`)
  126. **Returns:** a string containing the full path to the given hook for the
  127. given plugin if it exists, or `None` otherwise
  128. """
  129. full_fname = os.path.join(plugin, "%s.py" % type_)
  130. if os.path.isfile(full_fname):
  131. return full_fname
  132. return None
  133. def run_plugins(context, course, type_, into):
  134. """
  135. Run the given hook for all plugins
  136. **Parameters:**
  137. * `context`: the context associated with this request
  138. * `course`: the course from which plugins should be loaded (or `None` if no
  139. course).
  140. * `type_`: the name of a plugin hook as a string (e.g., `'post_load'`)
  141. * `into`: the context in which the plugins should be run
  142. **Returns:** `None`
  143. """
  144. plugins = available_plugins(context, course)
  145. for p in plugins:
  146. codefile = get_plugin_code_file(p, type_)
  147. if codefile is None:
  148. continue
  149. exec(cs_compile(codefile), into)
  150. def load_global_data(into, check_values=True):
  151. """
  152. Load global data into the specified dictionary
  153. Includes anything specified in `base_context.py` and `config.py`, as well
  154. as all of the modules in the catsoop directory.
  155. **Parameters:**
  156. * `into`: a dictionary into which the built-in values should be loaded
  157. **Optional Parameters:**
  158. * `check_values` (default `True`): whether to error on invalid
  159. configuration values
  160. **Returns:** `None` on success, or a string containing an error message on
  161. failure
  162. """
  163. into["cs_time"] = time.now()
  164. into["cs_timestamp"] = time.detailed_timestamp(into["cs_time"])
  165. if check_values and len(base_context._cs_config_errors) > 0:
  166. m = "ERROR while loading global CAT-SOOP configuration:\n\n" + "\n".join(
  167. base_context._cs_config_errors
  168. )
  169. debug_log.LOGGER.error(m)
  170. return m
  171. try:
  172. thisdir = os.path.dirname(__file__)
  173. sys.path.insert(0, thisdir)
  174. into["sys"] = sys
  175. fname = os.path.join(thisdir, "base_context.py")
  176. into["__file__"] = fname
  177. with open(fname) as f:
  178. t = f.read()
  179. t = '__name__ = "catsoop.base_context"\n' + t
  180. c = compile(t, fname, "exec")
  181. exec(c, into)
  182. into["cs_random"] = random.Random()
  183. into["csm_base_context"] = into["base_context"] = base_context
  184. clean_builtins(into)
  185. into["csm_loader"] = sys.modules[__name__]
  186. debug_log.setup_logging(into) # setup global log levels
  187. into["cs_debug_logger"] = debug_log.LOGGER
  188. except Exception as e:
  189. debug_log.LOGGER.error(
  190. "Exception encountered when trying to load global context: %s" % str(e)
  191. )
  192. debug_log.LOGGER.error("traceback: %s" % traceback.format_exc())
  193. return traceback.format_exc(e)
  194. def get_course_fs_location(context, course, join=True):
  195. """
  196. Returns the base location of the specified course on disk, including
  197. "special" courses (`_util`, `_qtype`, etc).
  198. **Parameters:**
  199. * `context`: the context associated with this request
  200. * `course`: the name of the course
  201. **Optional Parameters:**
  202. * `join` (default `True`): controls the return type. If `True`, the
  203. elements in the path will be joined together and the return value will
  204. be a string. If `False`, the return value will be a list of directory
  205. names.
  206. **Returns:** depends on the value of `join` (see above).
  207. """
  208. fs_root = context.get("cs_fs_root", base_context.cs_fs_root)
  209. data_root = context.get("cs_data_root", base_context.cs_data_root)
  210. if course == "_util":
  211. rtn = [fs_root, "__UTIL__"]
  212. elif course == "_qtype":
  213. rtn = [fs_root, "__QTYPES__"]
  214. elif course == "_auth":
  215. rtn = [fs_root, "__AUTH__"]
  216. elif course == "_plugin":
  217. rtn = [data_root, "plugins"]
  218. else:
  219. rtn = [data_root, "courses", course]
  220. if join:
  221. return os.path.join(*rtn)
  222. return rtn
  223. def generate_context(path):
  224. """
  225. Generate a new context, loading the global data and running the
  226. `preload.py` files for the specified path.
  227. This function is particularly useful in scripts, as many of the functions
  228. in CAT-SOOP require a "context" in which to run.
  229. **Parameters:**
  230. * `path`: a list of strings (starting with a course name) representing the
  231. path whose preload context should be spoofed
  232. **Returns:** a context dictionary containing the global values and those
  233. defined in the `preload.py` files along the specified path
  234. """
  235. ctx = {}
  236. load_global_data(ctx)
  237. ctx["cs_path_info"] = path
  238. if path:
  239. ctx["cs_course"] = path[0]
  240. cfile = ctx["csm_dispatch"].content_file_location(ctx, path)
  241. do_preload(ctx, ctx["cs_course"], path[1:], ctx, cfile)
  242. return ctx
  243. def _make_file_importer(base_dir):
  244. def _import_from_file(filename, name=None):
  245. filename = os.path.abspath(os.path.join(base_dir, filename))
  246. if name is None:
  247. name = os.path.basename(filename).rsplit(".", 1)[0].replace(".", "_")
  248. spec = importlib.util.spec_from_file_location(name, filename)
  249. module = importlib.util.module_from_spec(spec)
  250. spec.loader.exec_module(module)
  251. return module
  252. return _import_from_file
  253. def do_preload(context, course, path, into, content_file=None):
  254. """
  255. Load data from `preload.py` files in the appropriate directories for this
  256. request.
  257. The `preload.py` file from the course will be executed first, followed by
  258. the next level down the path, and so on until the file from this request's
  259. path has been run. The preload files will also be run from this page's
  260. children, though they will be executed into separate directories, and
  261. stored in the 'children' key of the supplied dictionary.
  262. This function is run before loading user data, so the code in `preload.py`
  263. cannot make use of user information, though it can make use of any
  264. variables specified in the base context or in preload files from higher up
  265. the tree.
  266. **Parameters:**
  267. * `context`: the context associated with this request
  268. * `course`: the course associated with this request
  269. * `path`: the path associated with this request, as a list of strings _not_
  270. including the course
  271. * `into`: the dictionary in which the code should be executed
  272. **Optional Parameters:**
  273. * `content_file` (default `None`): the name of the content file associated
  274. with this page load. We need to know this because the behavior is
  275. slightly different depending on whether the associated content file is
  276. indeed a `content.xx` file (in which case we can run a `preload.py` for
  277. _every element in the given path_) or whether it is an arbitrary file
  278. (in which case we cannot run a `preload.py` for the last element in the
  279. list).
  280. **Returns:** `None` on success, or the string `'missing'` on failure
  281. """
  282. into["cs_course"] = course
  283. directory = get_course_fs_location(context, course)
  284. if content_file is None:
  285. return "missing"
  286. breadcrumbs = []
  287. run_plugins(context, course, "pre_preload", into)
  288. if os.path.basename(content_file).rsplit(".", 1)[0] != "content":
  289. path = path[:-1]
  290. for ix, i in enumerate(path):
  291. new_name = os.path.join(directory, "preload.py")
  292. into["cs_local_python_import"] = _make_file_importer(directory)
  293. if os.path.isfile(new_name):
  294. exec(cs_compile(new_name), into)
  295. breadcrumbs.append(dict(into))
  296. try:
  297. newdir = get_directory_name(context, course, path[:ix], i)
  298. except FileNotFoundError:
  299. return "missing"
  300. if newdir is None:
  301. return "missing"
  302. directory = os.path.join(directory, newdir)
  303. new_name = os.path.join(directory, "preload.py")
  304. into["cs_local_python_import"] = _make_file_importer(directory)
  305. if os.path.isfile(new_name):
  306. exec(cs_compile(os.path.join(directory, "preload.py")), into)
  307. breadcrumbs.append(dict(into))
  308. into["cs_loader_states"] = breadcrumbs
  309. run_plugins(context, course, "pre_auth", into)
  310. _code_replacements = [
  311. ("tutor.question(", "tutor.question(globals(),"),
  312. ("tutor.qtype_inherit(", "tutor.qtype_inherit(globals(),"),
  313. ("tutor.init_random()", "tutor.init_random(globals())"),
  314. ]
  315. def _atomic_write(fname, contents):
  316. tname = fname + ".temp"
  317. with open(tname, "w") as f:
  318. f.write(contents)
  319. shutil.move(tname, fname)
  320. def cs_compile(fname, pre_code="", post_code=""):
  321. """
  322. Return a code object representing the code in the specified file, after
  323. making a few CAT-SOOP-specific modifications.
  324. As a side-effect, store on disk a file containing the updated code, and
  325. another containing information about how many new lines were added to the
  326. top of the given file, for use in error reporting. These pieces are only
  327. updated if the contents of the given file have changed (based on the
  328. modification time).
  329. **Parameters:**
  330. * `fname`: the name of the file to be compiled
  331. **Optional Parameters:**
  332. * `pre_code` (default `''`): a string containing code to be inserted at the
  333. start of the file
  334. * `post_code` (default `''`): a string containing code to be inserted at the
  335. end of the file
  336. **Returns:** a bytestring containing the compiled code
  337. """
  338. base_fname = fname.rsplit(".", 1)[0]
  339. fdirs = os.path.dirname(fname).split(os.sep)
  340. if fdirs and fdirs[0] == "":
  341. fdirs.pop(0)
  342. cname = ".".join([os.path.basename(base_fname), "py"])
  343. cdir = os.path.join(base_context.cs_data_root, "_cached", *fdirs)
  344. os.makedirs(cdir, exist_ok=True)
  345. cname = os.path.join(cdir, cname)
  346. with open(fname) as _f:
  347. real_code = _f.read()
  348. code = "\n\n".join([pre_code, real_code, post_code])
  349. for i, j in _code_replacements:
  350. code = code.replace(i, j)
  351. try:
  352. # this is a 'try' block instead of a straight conditional to account
  353. # for cases where, e.g., cname doesn't exist.
  354. assert os.stat(cname).st_mtime > os.stat(fname).st_mtime
  355. except:
  356. _atomic_write(cname, code)
  357. _atomic_write(cname + ".line_offset", str(len(pre_code) + 2))
  358. return compile(code, cname, "exec")
  359. def get_directory_name(context, course, path, name):
  360. """
  361. Return the actual name of a subdirectory of the given path (including
  362. sorting numbers) given the shortname of the resource it represents.
  363. Directories for pages can optionally begin with a series of digits and a
  364. period, in which case the name of the associated page is the piece
  365. following that period, and the numbers that come before it are used for
  366. sorting.
  367. **Parameters:**
  368. * `context`: the context associated with this request
  369. * `course`: the course associated with this request
  370. * `path`: the path associated with this request, as a list of strings _not_
  371. including the course
  372. * `name`: the name of the page being requested (a known child of `path`)
  373. **Returns:** the appropriate directory name if `name` is indeed a child of
  374. `path`, or `None` otherwise
  375. """
  376. s = get_subdirs(context, course, path)
  377. for i in s:
  378. if (i == name and not i.startswith("_") and not i.startswith(".")) or (
  379. "." in i and ".".join(i.split(".")[1:]) == name
  380. ):
  381. return i
  382. return None
  383. def get_subdirs(context, course, path):
  384. """
  385. Return all subdirectories of the given path that represent pages.
  386. **Parameters:**
  387. * `context`: the context associated with this request
  388. * `course`: the course associated with this request
  389. * `path`: the path associated with this request, as a list of strings _not_
  390. including the course
  391. **Returns:** a list of all directory names under `path` that represent
  392. pages.
  393. """
  394. path_pieces = get_course_fs_location(context, course, join=False)
  395. for ix, i in enumerate(path):
  396. d = get_directory_name(context, course, path[:ix], i)
  397. if d is None:
  398. return []
  399. path_pieces.append(d)
  400. directory = os.path.join(*path_pieces)
  401. return [
  402. i
  403. for i in os.listdir(directory)
  404. if os.path.isdir(os.path.join(directory, i))
  405. and re.match(r"[^_\.].*", i) is not None
  406. ]
  407. _py_custom_print = """
  408. cs_problem_spec = []
  409. oprint = print
  410. def newprint(*args, **kwargs):
  411. if 'file' in kwargs:
  412. oprint(*args, **kwargs)
  413. cs_problem_spec.append(' '.join(str(i) for i in args))
  414. print = newprint
  415. """
  416. def load_content(context, course, path, into, content_file=None):
  417. """
  418. Load data from the Python file specified by the content file in the
  419. appropriate directory for this request.
  420. This function is run after loading user data, so the code in the content
  421. file can make use of that information, which includes user permissions.
  422. This function also populates the `cs_children` variable by executing the
  423. `preload.py` files of this page's children into the given context.
  424. **Parameters:**
  425. * `context`: the context associated with this request
  426. * `course`: the course associated with this request
  427. * `path`: the path associated with this request, as a list of strings _not_
  428. including the course
  429. * `into`: the dictionary in which the code should be executed
  430. **Optional Parameters:**
  431. * `content_file` (default `None`): the name of the content file associated
  432. with this page load. We need to know this because the behavior is
  433. slightly different depending on whether the associated content file is
  434. indeed a `content.xx` file or whether it is an arbitrary file.
  435. **Returns:** `None`
  436. """
  437. run_plugins(context, course, "post_auth", into)
  438. directory = os.path.dirname(content_file)
  439. if os.path.basename(content_file).rsplit(".", 1)[0] == "content":
  440. subdirs = get_subdirs(context, course, path)
  441. shortnames = [
  442. (".".join(i.split(".")[1:]) if re.match(r"\d*\..*", i) else i)
  443. for i in subdirs
  444. ]
  445. children = dict([(i, dict(into)) for i in shortnames])
  446. for d, name in zip(subdirs, shortnames):
  447. new_name = os.path.join(directory, d, "preload.py")
  448. into["cs_local_python_import"] = _make_file_importer(d)
  449. if os.path.isfile(new_name):
  450. exec(cs_compile(new_name), children[name])
  451. children[name]["directory"] = d
  452. into["cs_children"] = children
  453. else:
  454. into["cs_children"] = {}
  455. into["cs_source_format"] = content_file.rsplit(".", 1)[-1]
  456. with open(content_file) as f:
  457. into["cs_content"] = f.read()
  458. into["cs_local_python_import"] = _make_file_importer(directory)
  459. if into["cs_source_format"] != "py":
  460. into["cs_content"] = language.handle_includes(into, into["cs_content"])
  461. into["cs_content"] = language.handle_python_tags(into, into["cs_content"])
  462. else:
  463. exec(_py_custom_print, context)
  464. exec(context["cs_content"], context)
  465. exec("print = oprint", context)
  466. if "cs_post_load" in into:
  467. into["cs_post_load"](into)
  468. run_plugins(context, course, "post_load", into)
  469. language.source_formats[into["cs_source_format"]](into)
  470. if "cs_pre_handle" in into:
  471. into["cs_pre_handle"](into)
  472. run_plugins(context, course, "pre_handle", into)
  473. last_mod = os.stat(content_file).st_mtime
  474. cache = into["csm_cslog"].most_recent(
  475. "_question_info", [course] + path, "question_info", None
  476. )
  477. if (
  478. course not in {None, "_util"}
  479. and (cache is None or last_mod > cache["timestamp"])
  480. and "cs_problem_spec" in into
  481. ):
  482. qs = OrderedDict()
  483. for i in into["cs_problem_spec"]:
  484. if isinstance(i, tuple):
  485. x = qs[i[1]["csq_name"]] = {}
  486. x["csq_npoints"] = i[0]["total_points"](**i[1])
  487. x["csq_name"] = i[1]["csq_name"]
  488. x["csq_display_name"] = i[1].get("csq_display_name", x["csq_name"])
  489. x["qtype"] = i[0]["qtype"]
  490. x["csq_grading_mode"] = i[1].get("csq_grading_mode", "auto")
  491. into["csm_cslog"].overwrite_log(
  492. "_question_info",
  493. [course] + path,
  494. "question_info",
  495. {"timestamp": last_mod, "questions": qs},
  496. )