CAT-SOOP is a flexible, programmable learning management system based on the Python programming language. https://catsoop.mit.edu
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

352 lines
13 KiB

  1. # This file is part of CAT-SOOP
  2. # Copyright (c) 2011-2020 by The CAT-SOOP Developers <catsoop-dev@mit.edu>
  3. #
  4. # This program is free software: you can redistribute it and/or modify it under
  5. # the terms of the GNU Affero General Public License as published by the Free
  6. # Software Foundation, either version 3 of the License, or (at your option) any
  7. # later version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT
  10. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  12. # details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. import os
  17. import sys
  18. import time
  19. import shutil
  20. import signal
  21. import logging
  22. import tempfile
  23. import traceback
  24. import collections
  25. import multiprocessing
  26. from datetime import datetime
  27. CATSOOP_LOC = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
  28. if CATSOOP_LOC not in sys.path:
  29. sys.path.append(CATSOOP_LOC)
  30. import catsoop.base_context as base_context
  31. import catsoop.lti as lti
  32. import catsoop.auth as auth
  33. import catsoop.cslog as cslog
  34. import catsoop.loader as loader
  35. import catsoop.language as language
  36. import catsoop.dispatch as dispatch
  37. from catsoop.process import set_pdeathsig
  38. CHECKER_DB_LOC = os.path.join(base_context.cs_data_root, "_logs", "_checker")
  39. RUNNING = os.path.join(CHECKER_DB_LOC, "running")
  40. QUEUED = os.path.join(CHECKER_DB_LOC, "queued")
  41. RESULTS = os.path.join(CHECKER_DB_LOC, "results")
  42. STAGING = os.path.join(CHECKER_DB_LOC, "staging")
  43. REAL_TIMEOUT = base_context.cs_checker_global_timeout
  44. DEBUG = True
  45. LOGGER = logging.getLogger("cs")
  46. def log(msg):
  47. if not DEBUG:
  48. return
  49. dt = datetime.now()
  50. omsg = "[checker:%s]: %s" % (dt, msg)
  51. LOGGER.info(omsg)
  52. def exc_message(context):
  53. exc = traceback.format_exc()
  54. exc = context["csm_errors"].clear_info(context, exc)
  55. return ('<p><font color="red"><b>CAT-SOOP ERROR:</b><pre>%s</pre></font>') % exc
  56. def do_check(row):
  57. """
  58. Check submission, dispatching to appropriate question handler
  59. row: (dict) action to take, with input data
  60. """
  61. os.setpgrp() # make this part of its own process group
  62. set_pdeathsig()() # but make it die if the parent dies. will this work?
  63. context = loader.generate_context(row["path"])
  64. context["cs_course"] = row["path"][0]
  65. context["cs_path_info"] = row["path"]
  66. context["cs_username"] = row["username"]
  67. context["cs_user_info"] = {"username": row["username"]}
  68. context["cs_user_info"] = auth.get_user_information(context)
  69. context["cs_now"] = datetime.fromtimestamp(row["time"])
  70. have_lti = ("cs_lti_config" in context) and ("lti_data" in row)
  71. if have_lti:
  72. lti_data = row["lti_data"]
  73. lti_handler = lti.lti4cs_response(
  74. context, lti_data
  75. ) # LTI response handler, from row['lti_data']
  76. log("lti_handler.have_data=%s" % lti_handler.have_data)
  77. if lti_handler.have_data:
  78. log("lti_data=%s" % lti_handler.lti_data)
  79. if not "cs_session_data" in context:
  80. context["cs_session_data"] = {}
  81. context["cs_session_data"][
  82. "is_lti_user"
  83. ] = True # so that course preload.py knows
  84. cfile = dispatch.content_file_location(context, row["path"])
  85. log(
  86. "Loading grader python code course=%s, cfile=%s" % (context["cs_course"], cfile)
  87. )
  88. loader.load_content(
  89. context, context["cs_course"], context["cs_path_info"], context, cfile
  90. )
  91. namemap = collections.OrderedDict()
  92. cnt = 0
  93. total_possible_npoints = 0
  94. for elt in context["cs_problem_spec"]:
  95. if isinstance(elt, tuple): # each elt is (problem_context, problem_kwargs)
  96. m = elt[1]
  97. namemap[m["csq_name"]] = elt
  98. csq_npoints = m.get("csq_npoints", 0)
  99. total_possible_npoints += (
  100. csq_npoints
  101. ) # used to compute total aggregate score pct
  102. if DEBUG:
  103. question = elt[0]["handle_submission"]
  104. dn = m.get("csq_display_name")
  105. log("Map: %s (%s) -> %s" % (m["csq_name"], dn, question))
  106. log(
  107. "%s csq_npoints=%s, total_points=%s"
  108. % (dn, csq_npoints, elt[0]["total_points"]())
  109. )
  110. cnt += 1
  111. if DEBUG:
  112. log(
  113. "Loaded %d procedures into question namemap (total_possible_npoints=%s)"
  114. % (cnt, total_possible_npoints)
  115. )
  116. # now, depending on the action we want, take the appropriate steps
  117. names_done = set()
  118. for name in row["names"]:
  119. if name.startswith("__"):
  120. name = name[2:].rsplit("_", 1)[0]
  121. if name in names_done:
  122. continue
  123. names_done.add(name)
  124. question, args = namemap[name]
  125. if row["action"] == "submit":
  126. if DEBUG:
  127. log("submit name=%s, row=%s" % (name, row))
  128. try:
  129. handler = question["handle_submission"]
  130. if DEBUG:
  131. log("handler=%s" % handler)
  132. resp = handler(row["form"], **args)
  133. score = resp["score"]
  134. msg = resp["msg"]
  135. extra = resp.get("extra_data", None)
  136. except Exception as err:
  137. resp = {}
  138. score = 0.0
  139. log("Failed to handle submission, err=%s" % str(err))
  140. log("Traceback=%s" % traceback.format_exc())
  141. msg = exc_message(context)
  142. extra = None
  143. if DEBUG:
  144. log("submit resp=%s, msg=%s" % (resp, msg))
  145. score_box = context["csm_tutor"].make_score_display(
  146. context, args, name, score, True
  147. )
  148. elif row["action"] == "check":
  149. try:
  150. msg = question["handle_check"](row["form"], **args)
  151. except:
  152. msg = exc_message(context)
  153. score = None
  154. score_box = ""
  155. extra = None
  156. if DEBUG:
  157. log("check name=%s, msg=%s" % (name, msg))
  158. row["score"] = score
  159. row["score_box"] = score_box
  160. row["response"] = language.handle_custom_tags(context, msg)
  161. row["extra_data"] = extra
  162. # make temporary file to write results to
  163. magic = row["magic"]
  164. temploc = os.path.join(STAGING, "results.%s" % magic)
  165. with open(temploc, "wb") as f:
  166. f.write(context["csm_cslog"].prep(row))
  167. # move that file to results, close the handle to it.
  168. newloc = os.path.join(RESULTS, magic[0], magic[1], magic)
  169. os.makedirs(os.path.dirname(newloc), exist_ok=True)
  170. shutil.move(temploc, newloc)
  171. try:
  172. os.close(_)
  173. except:
  174. pass
  175. # then remove from running
  176. os.unlink(os.path.join(RUNNING, row["magic"]))
  177. # finally, update the appropriate log
  178. cm = context["csm_cslog"].log_lock(
  179. [row["username"], *row["path"], "problemstate"]
  180. )
  181. with cm as lock:
  182. x = context["csm_cslog"].most_recent(
  183. row["username"], row["path"], "problemstate", {}, lock=False
  184. )
  185. if row["action"] == "submit":
  186. x.setdefault("scores", {})[name] = row["score"]
  187. x.setdefault("score_displays", {})[name] = row["score_box"]
  188. x.setdefault("cached_responses", {})[name] = row["response"]
  189. x.setdefault("extra_data", {})[name] = row["extra_data"]
  190. context["csm_cslog"].overwrite_log(
  191. row["username"], row["path"], "problemstate", x, lock=False
  192. )
  193. # update LTI tool consumer with new aggregate score
  194. if have_lti and lti_handler.have_data and row["action"] == "submit":
  195. aggregate_score = 0
  196. cnt = 0
  197. try:
  198. for k, v in x[
  199. "scores"
  200. ].items(): # e.g. 'scores': {'q000000': 1.0, 'q000001': True, 'q000002': 1.0}
  201. aggregate_score += float(v)
  202. cnt += 1
  203. if total_possible_npoints == 0:
  204. total_possible_npoints = 1.0
  205. LOGGER.error("[checker] total_possible_npoints=0 ????")
  206. aggregate_score_fract = (
  207. aggregate_score * 1.0 / total_possible_npoints
  208. ) # LTI wants score in [0, 1.0]
  209. log(
  210. "Computed aggregate score from %d questions, aggregate_score=%s (fraction=%s)"
  211. % (cnt, aggregate_score, aggregate_score_fract)
  212. )
  213. log(
  214. "magic=%s sending aggregate_score_fract=%s to LTI tool consumer"
  215. % (row["magic"], aggregate_score_fract)
  216. )
  217. score_ok = True
  218. except Exception as err:
  219. LOGGER.error(
  220. "[checker] failed to compute score for problem %s, err=%s"
  221. % (x, err)
  222. )
  223. score_ok = False
  224. if score_ok:
  225. try:
  226. lti_handler.send_outcome(aggregate_score_fract)
  227. except Exception as err:
  228. LOGGER.error(
  229. "[checker] failed to send outcome to LTI consumer, err=%s"
  230. % str(err)
  231. )
  232. LOGGER.error("[checker] traceback=%s" % traceback.format_exc())
  233. running = []
  234. # if anything is in the "running" dir when we start, that's an error. turn
  235. # those back to queued to force them to run again (put them at the front of the
  236. # queue).
  237. for f in os.listdir(RUNNING):
  238. shutil.move(os.path.join(RUNNING, f), os.path.join(QUEUED, "0_%s" % f))
  239. # and now actually start running
  240. if DEBUG:
  241. log("starting main loop")
  242. nrunning = None
  243. while True:
  244. # check for dead processes
  245. dead = set()
  246. if DEBUG and not (
  247. len(running) == nrunning
  248. ): # output debug message when nrunning changes
  249. nrunning = len(running)
  250. log("have %d running (%s)" % (nrunning, running))
  251. for i in range(len(running)):
  252. id_, row, p = running[i]
  253. if not p.is_alive():
  254. log(" Process %s is dead" % p)
  255. if p.exitcode != 0:
  256. row["score"] = 0.0
  257. row["score_box"] = ""
  258. if p.exitcode < 0: # this probably only happens if we killed it
  259. row["response"] = (
  260. "<font color='red'><b>Your submission could not be checked "
  261. "because the checker ran for too long.</b></font>"
  262. )
  263. else: # a python error or similar
  264. row["response"] = (
  265. "<font color='red'><b>An unknown error occurred when "
  266. "processing your submission</b></font>"
  267. )
  268. magic = row["magic"]
  269. newloc = os.path.join(RESULTS, magic[0], magic[1], magic)
  270. with open(newloc, "wb") as f:
  271. f.write(cslog.prep(row))
  272. # then remove from running
  273. os.unlink(os.path.join(RUNNING, row["magic"]))
  274. dead.add(i)
  275. elif time.time() - p._started > REAL_TIMEOUT:
  276. try:
  277. os.killpg(os.getpgid(p.pid), signal.SIGKILL)
  278. except:
  279. pass
  280. if dead:
  281. log("Removing %s" % dead)
  282. for i in sorted(dead, reverse=True):
  283. running.pop(i)
  284. if base_context.cs_checker_parallel_checks - len(running) > 0:
  285. # otherwise, add an entry to running.
  286. waiting = sorted(os.listdir(QUEUED))
  287. if waiting:
  288. # grab the first thing off the queue, move it to the "running" dir
  289. first = waiting[0]
  290. qfn = os.path.join(QUEUED, first)
  291. with open(qfn, "rb") as f:
  292. try:
  293. row = cslog.unprep(f.read())
  294. except Exception as err:
  295. LOGGER.error(
  296. "[checker] failed to read queue log file %s, error=%s, traceback=%s"
  297. % (qfn, err, traceback.format_exc())
  298. )
  299. continue
  300. _, magic = first.split("_")
  301. row["magic"] = magic
  302. shutil.move(os.path.join(QUEUED, first), os.path.join(RUNNING, magic))
  303. log("Moving from queued to running: %s " % first)
  304. # start a worker for it
  305. log("Starting checker with row=%s" % row)
  306. p = multiprocessing.Process(target=do_check, args=(row,))
  307. running.append((magic, row, p))
  308. p.start()
  309. p._started = time.time()
  310. p._entry = row
  311. log("Process pid = %s" % p.pid)
  312. time.sleep(0.1)