UESTC离散数学自动答题脚本

Denvo 树犹如此,人何以堪

前言

离散课程要求需要在学堂在线这一网站中刷课及回答完相应的题目。由于网站支持自动连播以及倍速,那么问题主要在于回答相应的题目。我写了一个适用于Tampermonkey(油猴、篡改猴)的脚本,现拿出来给大家分享一下,以及简单阐述一下一些过程。

安装脚本

总体步骤可根据我之前写的军事理论课刷课脚本中的前三步来操作,但是需要粘贴的脚本为下面贴出来的这个,而不是军事理论课那篇文章中的脚本。

离散数学答题脚本
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// ==UserScript==
// @name UESTC学堂在线离散数学刷题脚本
// @namespace https://www.denvoshome.xyz/posts/UESTC_discrete_math_script/
// @version 1.0
// @description 当进入答题时,点击开始即可自动答题,只会从当前显示的题目开始往后做,不会检查前面的题目是否已完成
// @author Denvo
// @match https://www.xuetangx.com/learn/space/EST07011001151/EST07011001151/29593250/exercise/*
// @grant GM_registerMenuCommand
// @grant GM_unregisterMenuCommand
// @grant GM_setValue
// @grant GM_getValue
// ==/UserScript==

(function () {
'use strict';

const urls = [
"75336320", "75336325", "75336363", "75336409", "75336419", "75336472", "75336504",
"75336518", "75336560", "75336582", "75336615", "75336673", "75336685", "75336623"
];

const answers = {
"75336320": {
"1": "D",
"2": "D",
"3": "B",
"4": "B",
"5": "B",
"6": "ABD",
"7": "EC",
"8": "BD",
"9": "ABE",
"10": "F"
},
"75336325": {
"1": "B",
"2": "C",
"3": "A",
"4": "A",
"5": "B",
"6": "D"
},
"75336363": {
"1": "B",
"2": "A",
"3": "C",
"4": "D",
"5": "D",
"6": "C",
"7": "B",
"8": "B",
"9": "B",
"10": "D",
"11": "ACE",
"12": "AB",
"13": "BC",
"14": "ADC",
"15": "AED",
"16": "F",
"17": "F",
"18": "T",
"19": "F",
"20": "F"
},
"75336409": {
"1": "B",
"2": "A",
"3": "B",
"4": "D",
"5": "D",
"6": "C",
"7": "C",
"8": "D",
"9": "C",
"10": "A",
"11": "DC",
"12": "BDE",
"13": "ABCD",
"14": "AC",
"15": "BCE",
"16": "F",
"17": "T",
"18": "F",
"19": "F",
"20": "F"
},
"75336419": {
"1": "T",
"2": "F",
"3": "F",
"4": "T",
"5": "T"
},
"75336472": {
"1": "D",
"2": "C",
"3": "C",
"4": "A",
"5": "D",
"6": "C",
"7": "A",
"8": "C",
"9": "C",
"10": "A",
"11": "BCDE",
"12": "BC",
"13": "ABCE",
"14": "BCDE",
"15": "AE",
"16": "F",
"17": "T",
"18": "T",
"19": "F",
"20": "F"
},
"75336504": {
"1": "C",
"2": "D",
"3": "B",
"4": "B",
"5": "C",
"6": "D",
"7": "A",
"8": "C",
"9": "D",
"10": "C",
"11": "ABE",
"12": "ADE",
"13": "ABC",
"14": "AB",
"15": "CED",
"16": "F",
"17": "F",
"18": "T",
"19": "F",
"20": "F"
},
"75336518": {
"1": "C",
"2": "C",
"3": "B",
"4": "C",
"5": "A"
},
"75336560": {
"1": "B",
"2": "B",
"3": "D",
"4": "C",
"5": "A",
"6": "C",
"7": "B",
"8": "A",
"9": "B",
"10": "C",
"11": "AC",
"12": "AD",
"13": "ACE",
"14": "BCD",
"15": "ACE",
"16": "T",
"17": "T",
"18": "F",
"19": "T",
"20": "F"
},
"75336582": {
"1": "B",
"2": "A",
"3": "A",
"4": "B",
"5": "D",
"6": "B",
"7": "B",
"8": "C",
"9": "AD",
"10": "BDE",
"11": "ABCDE",
"12": "AC",
"13": "F",
"14": "F",
"15": "T",
"16": "T"
},
"75336615": {
"1": "C",
"2": "B",
"3": "A",
"4": "C",
"5": "C",
"6": "D",
"7": "B",
"8": "A",
"9": "D",
"10": "B",
"11": "BCD",
"12": "CD",
"13": "ACD",
"14": "ABEC",
"15": "ABCDE",
"16": "F",
"17": "T",
"18": "F",
"19": "F",
"20": "T"
},
"75336623": {
"1": "C",
"2": "B",
"3": "D",
"4": "B",
"5": "B",
"6": "D",
"7": "B",
"8": "D",
"9": "D",
"10": "C",
"11": "C",
"12": "C",
"13": "B",
"14": "C",
"15": "C",
"16": "B",
"17": "D",
"18": "D",
"19": "C",
"20": "B",
"21": "C",
"22": "C",
"23": "C",
"24": "A",
"25": "C",
"26": "C",
"27": "B",
"28": "A",
"29": "B",
"30": "C",
"31": "D",
"32": "B",
"33": "C",
"34": "C",
"35": "B",
"36": "B",
"37": "B",
"38": "C",
"39": "A",
"40": "A",
"41": "C",
"42": "C",
"43": "B",
"44": "A",
"45": "B",
"46": "D",
"47": "B",
"48": "B",
"49": "A",
"50": "A",
"51": "D",
"52": "D",
"53": "B",
"54": "D",
"55": "D",
"56": "B",
"57": "B",
"58": "D",
"59": "B",
"60": "D",
"61": "ADE",
"62": "ADE",
"63": "BCD",
"64": "CD",
"65": "AE",
"66": "BCE",
"67": "AC",
"68": "ABCE",
"69": "CE",
"70": "ABD",
"71": "BDE",
"72": "ABC",
"73": "ABC",
"74": "ACD",
"75": "BCE",
"76": "CE",
"77": "ABDE",
"78": "ACD",
"79": "BE",
"80": "ABCDE",
"81": "T",
"82": "T",
"83": "F",
"84": "F",
"85": "F",
"86": "T",
"87": "F",
"88": "F",
"89": "F",
"90": "T",
"91": "F",
"92": "F",
"93": "F",
"94": "F",
"95": "F",
"96": "T",
"97": "T",
"98": "F",
"99": "F",
"100": "F"
},
"75336673": {
"1": "D",
"2": "B",
"3": "ABCD",
"4": "F",
"5": "F"
},
"75336685": {
"1": "B",
"2": "D",
"3": "BDE",
"4": "T",
"5": "F"
}
}

let isScriptEnabled = GM_getValue('isScriptEnabled', false);
let toggleButton = null;

// 暂停一段时间的函数
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));

/**
* 等待元素加载,使用请在前面加一个await
* @param {string} selector - CSS选择器
* @param {number} timeout - 超时时间(ms),默认30s
* @returns {Promise<Element>}
*/
async function waitElement(selector, timeout = 30000) {
const start = Date.now();
while (Date.now() - start < timeout) {
const el = document.querySelector(selector);
if (el) return el;
// 释放控制权,让浏览器渲染页面
await sleep(200);
}
throw new Error('等待元素 ${selector} 超时');
}

/**
* 等待内容会被后续js更新的元素加载,使用请在前面加一个await
* @param {string} selector - CSS选择器
* @returns {Promise<String>}
*/
async function getRobustText(selector) {
let el = await waitElement(selector);
// 等待非0值,如果本身是0的话就等达到重试次数再返回
let count = 0;
while ((el.innerText === "0" || el.innerText.trim() === "") && count < 10) {
await sleep(500);
count++;
}
return el.innerText;
}

// 答题主要函数
async function answering() {
// 脚本未启用则立即返回
if (!isScriptEnabled) {
console.log("当前脚本未启用");
return;
} else {
console.log("脚本正在运行中...");
}

// 判断当前页面信息
const pathParts = window.location.pathname.split('/');
let currentId = pathParts[pathParts.length - 1];
let currentIdIndex = urls.indexOf(currentId);

if (currentIdIndex === -1) {
console.error("未能成功读取页面,打开的页面是答题页面吗?");
return;
}

// 获取当前页面显示的是哪道题
console.log("当前正在完成" + (currentIdIndex < 13 ? ("第" + (currentIdIndex + 1) + "章") : "期末测试"));
let currentQuestionIndex = parseInt(await getRobustText('span.curent'));
let totalQuestions = parseInt((await getRobustText('span.total')).substring(1));

// 对于每道题的自动答题
for (; currentQuestionIndex <= totalQuestions; currentQuestionIndex++) {

if (!isScriptEnabled) return;

console.log(`正在回答第[${currentQuestionIndex}/${totalQuestions}]题`);

// 判断本题是否已被回答
if (document.body.innerText.includes('正确答案')) {
console.log("本题已经作答!将跳过本题");
} else {
let currentAnswer = answers[currentId][currentQuestionIndex.toString()];
console.log(`本题答案:${currentAnswer}`);

// 选择正确的选项
if (document.querySelectorAll('.answerList span.panduan').length == 2) {
// 判断题
document.querySelector(currentAnswer.includes('T') ? '.answerList span.panduan.true' : '.answerList span.panduan:not(.true)')?.click();
} else {
// 选择题
const options = document.querySelectorAll('.answerList span');
for (let char of currentAnswer) {
for (let opt of options) {
if (opt.innerText.trim().toUpperCase().startsWith(char)) {
opt.click();
await sleep(1000);
break;
}
}
}
}

// 提交答案
const submitBtn = await waitElement('.btnCon button.btn');
if (submitBtn && submitBtn.innerText.includes('提交')) {
// 模拟完整的物理点击链,普通的DOM属性click不起作用
const opts = { bubbles: true, cancelable: true, composed: true };
submitBtn.dispatchEvent(new PointerEvent('pointerdown', opts));
submitBtn.dispatchEvent(new MouseEvent('mousedown', opts));
submitBtn.dispatchEvent(new PointerEvent('pointerup', opts));
submitBtn.dispatchEvent(new MouseEvent('mouseup', opts));
submitBtn.dispatchEvent(new MouseEvent('click', opts));

await sleep(1500);
}
}

// 下一题
if (currentQuestionIndex < totalQuestions) {
const nextBtn = document.querySelector('i.iconfont.right.unselectable');
if (nextBtn) {
nextBtn.click();
await sleep(1500);
}
}
}

// 本章测试答完,转到下一章
console.log("本章测试已答完!");
await sleep(1000);
if (currentIdIndex < urls.length - 1) {
await sleep(2000);
location.href = "https://www.xuetangx.com/learn/space/EST07011001151/EST07011001151/29593250/exercise/" + urls[currentIdIndex + 1];
} else {
isScriptEnabled = false;
GM_setValue('isScriptEnabled', isScriptEnabled);
updateButton();
alert("恭喜!所有章节已全部完成。");
}
}

function toggleState() {
// 切换脚本状态
isScriptEnabled = !isScriptEnabled;
GM_setValue('isScriptEnabled', isScriptEnabled);
updateButton();
console.log("已点击" + (isScriptEnabled ? "启动" : "停止") + "脚本");
// 如果是启动脚本则通过刷新来运行脚本
if (isScriptEnabled) location.reload();
}

function updateButton() {
if (toggleButton !== null) GM_unregisterMenuCommand(toggleButton);
let menuText = isScriptEnabled ? "停止脚本" : "启动脚本";
toggleButton = GM_registerMenuCommand(menuText, toggleState);
}

// 初始化
updateButton();
window.addEventListener('load', () => {
// 加载完成后等待2s,等待渲染完成再运行脚本
setTimeout(answering, 2000);
});
})();

脚本使用方法

先打开类似下图的答题页面:

答题页面
答题页面

此时打开Tampermonkey的菜单,里面会显示类似下图这样:

Tampermonkey菜单
Tampermonkey菜单

此时点击启动脚本,脚本就会开始发挥作用。此时不建议在此网页上操作任何内容!

脚本运行时,上面的启动脚本会被重命名为停止脚本。脚本停止后,会恢复为启动脚本

重要!!!启动脚本后,如果想要关闭脚本,一定要先点击“停止脚本”按钮!

当脚本启用时,脚本会读取当前显示的题目,然后自动选择对应的答案,并点击提交按钮,并切换到下一题自动回答。当本章测验结束后,将自动切换到下一章继续答题。

Playwright+AI获得答案的爬虫

上述脚本实际上是把答案都记在本地的,然后根据题目直接匹配相应的答案即可。而我最开始并没有答案,于是采用了Playwright框架,并接入AI来帮助我获取答案。不过我使用Java编写。以下是我使用到的第三方库:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
<dependencies>
<!-- 框架 -->
<dependency>
<groupId>com.microsoft.playwright</groupId>
<artifactId>playwright</artifactId>
<version>1.56.0</version>
</dependency>
<!-- 日志 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.17</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
<version>1.5.21</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.5.21</version>
</dependency>
<!-- json -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.60</version>
</dependency>
</dependencies>

一共写了三份源文件:Main.javaOllama.javaGemini.java。其中主要逻辑都写在了Main类里面。其中的getAns()方法用于在我已经提交答案之后,从头开始获取正确答案,而ai()方法用于调用AI帮我自动答题。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
package xyz.denvo;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONWriter;
import com.microsoft.playwright.*;
import com.microsoft.playwright.options.LoadState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.file.Path;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class Main {
private static final Logger log = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
log.info("正在启动PlayWright...");

// 有的时候会有个dialog提示新人发帖啥的,关闭按钮的class="d-close",可以通过点击它关闭这个dialog

// 答题的URL
final String[] urls = {"75336320", "75336325", "75336363", "75336409", "75336419", "75336472", "75336504",
"75336518", "75336560", "75336582", "75336615", "75336673", "75336685", "75336623"};

// 自动化主要逻辑
try (Playwright playwright = Playwright.create();
BrowserContext context = playwright.chromium().launchPersistentContext(
Path.of("user_data"),
new BrowserType.LaunchPersistentContextOptions().setHeadless(false).setViewportSize(null)
.setArgs(List.of("--disable-blink-features=AutomationControlled"))
.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36 Edg/147.0.0.0"))) {
context.addInitScript(Path.of("stealth.min.js"));
Page page = context.pages().get(0);
log.info("已启动浏览器");
page.navigate("https://www.xuetangx.com/");
log.info("确定网页登陆后请按下回车继续执行");
new java.util.Scanner(System.in).nextLine();
log.info("程序继续执行...");

//ai(urls, page);
getAns(urls, page);

} catch (InterruptedException e) {
log.error("发生错误!", e);
}
}

private static void getAns(String[] urls, Page page) throws InterruptedException {
Map<String, Object> jsonMap = new HashMap<>();
try {
for (int i = 0; i < urls.length; i++) {
page.navigate("https://www.xuetangx.com/learn/space/EST07011001151/EST07011001151/29593250/exercise/" + urls[i]);
log.info("当前正在获取{}的答案", i < 13 ? "第" + (i + 1) + "章" : "期末测试");

page.waitForLoadState(LoadState.NETWORKIDLE);
page.waitForSelector("span.curent");
page.waitForSelector("span.total");
page.waitForSelector("i.iconfont.right.unselectable");
int currentIndex = Integer.parseInt(page.locator("span.curent").innerText()),
totalIndex = Integer.parseInt(page.locator("span.total").innerText().substring(1));

// 强制翻到第一题开始获取答案
for (; currentIndex > 1; currentIndex--) {
log.info("当前未翻到第一题!将翻到第一题");
page.locator(".tabbar i.iconfont:not(.right)").click();
Thread.sleep(1000);
}

Map<Integer, String> chapterMap = new HashMap<>();
for (; currentIndex <= totalIndex; currentIndex++) {
log.info("当前正在处理第{}/{}题", currentIndex, totalIndex);
Thread.sleep(1000);
Locator correctSpans = page.locator("//p[contains(@class, 'myanswer') and contains(text(), '正确答案')]/following-sibling::span");
String firstClass = correctSpans.first().getAttribute("class");
if (firstClass.contains("panduan")) {
// 判断题
String answer = firstClass.contains("true") ? "T" : "F";
log.info("此题答案:{}", answer);
chapterMap.put(currentIndex, answer);
} else {
// 选择题
// 获取容器下所有 .radio_xtb 的文本值
// allInnerTexts() 会自动返回一个 List<String>,如 ["A", "B", "D"]
List<String> texts = correctSpans.allInnerTexts();
StringBuilder sb = new StringBuilder();
for (String t : texts) {
String cleanText = t.trim();
if (!cleanText.isEmpty() && cleanText.matches("[A-Z]")) {
sb.append(cleanText);
}
}
String answer = sb.toString();
log.info("此题答案:{}", answer);
chapterMap.put(currentIndex, answer);
}

// 翻页处理
if (currentIndex != totalIndex) {
page.locator("i.iconfont.right.unselectable").click();
Thread.sleep(1000);
}
}
jsonMap.put(urls[i], chapterMap);
}
} catch (TimeoutError e) {
log.error("等待超时", e);
} finally {
log.info("处理完成!");
log.info("最终生成的json如下:{}", JSON.toJSONString(jsonMap, JSONWriter.Feature.PrettyFormatWith4Space));
}
}

private static void ai(String[] urls, Page page) throws InterruptedException {
// 每次自动答题
for (int i = 2; i < urls.length; i++) {
page.navigate("https://www.xuetangx.com/learn/space/EST07011001151/EST07011001151/29593250/exercise/" + urls[i]);
log.info("当前正在处理{}", i < 13 ? "第" + (i + 1) + "章" : "期末测试");

// 获取当前题目状态
page.waitForLoadState(LoadState.NETWORKIDLE);
page.waitForSelector("span.curent");
page.waitForSelector("span.total");
page.waitForSelector("i.iconfont.right.unselectable");
int currentIndex = Integer.parseInt(page.locator("span.curent").innerText()),
totalIndex = Integer.parseInt(page.locator("span.total").innerText().substring(1));
Locator questionDiv = page.locator(".question");

// 分每道题处理
for (; currentIndex <= totalIndex; currentIndex++) {
log.info("当前处理中 [{}/{}]", currentIndex, totalIndex);

// 截屏
byte[] screenshotBytes = questionDiv.screenshot();

// AI回答
String screenshotBase64 = Base64.getEncoder().encodeToString(screenshotBytes);
log.debug("截图base64:{}", screenshotBase64);
String result = Gemini.getAnswer(screenshotBase64);
log.info("此题答案:{}", result);
if (result.length() > 5) {
log.error("AI出现错误!");
return;
}

// 根据AI的答案勾选每一个符合的选项
if (page.locator(".answerList span.panduan").count() == 2) {
log.info("此题为判断题");
// 逻辑:A 或类似“正确”含义的点击第一个 span.true
// B 或类似“错误”含义的点击第二个不带 true 的 span
if (result.contains("A")) {
page.locator(".answerList span.panduan.true").click();
} else if (result.contains("B")) {
page.locator(".answerList span.panduan:not(.true)").click();
} else {
log.error("检测到判断题,但AI给出的答案无法理解!请人工干预!");
new java.util.Scanner(System.in).nextLine();
}
} else {
log.info("此题为选择题");
for (char choice : result.toCharArray()) {
String letter = String.valueOf(choice);
page.locator(".answerList span")
.filter(new Locator.FilterOptions().setHasText(letter))
.first().click();
page.waitForTimeout(1000);
}
}

// 点击提交按钮
Locator submitBtn = page.locator(".btnCon button.btn:has-text('提交')");
if (submitBtn.isVisible()) {
submitBtn.click();
log.info("此题提交成功!");
Thread.sleep(1000);
}

// 翻页处理
if (currentIndex != totalIndex) {
page.locator("i.iconfont.right.unselectable").click();
Thread.sleep(1000);
}
}
Thread.sleep(1000);
log.info("当前测试已答完");
Thread.sleep(1000);
}
log.info("所有答题已完成!");
}
}

其中我使用了launchPersistentContext方法来创建一个能持久化保存用户数据的浏览器环境,通过--disable-blink-features=AutomationControlled选项,并重新设置UA为正常浏览器的UA,并将Steath.min.js作为初始脚本注入,从而避免被大部分网站检测脚本环境。

AI答题的逻辑大致是,通过对题目区域截图,并通过API扔给AI,让AI返回结果,根据结果选择相应选项。我的代码定义了两个函数Gemini.getAnswer(base64_image)Ollama.getAnswer(base64_image)来把截图扔给AI。

AI这边需要一个能接受图像输入的模型,并且通过System角色的提示词来限制AI的输出。当时为了减少Token的支出,想的是使用Ollama跑本地模型。但经尝试,Qwen3.5:9b、Qwen3.5:35b、Qwen3.6:27b在处理部分题目时会输出混乱的字符串,应该是因为达到了token的限制之类的,我也不是很清楚,但不太好用。我先贴一个用于Ollama的类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package xyz.denvo;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

public class Ollama {
private static final String BASE_URL = "http://localhost:11434/api/chat";
private static final String MODEL_ID = "qwen3.5:9b";

private static final Logger log = LoggerFactory.getLogger(Ollama.class);
private static final HttpClient client = HttpClient.newHttpClient();

public static String getAnswer(String base64_image) {
// 构建请求体
JSONObject requestBody = new JSONObject();
requestBody.put("model", MODEL_ID);
requestBody.put("stream", false);
// 构建 messages
JSONArray messages = new JSONArray();
// System
JSONObject systemMsg = new JSONObject();
systemMsg.put("role", "system");
systemMsg.put("content",
"""
### Role
你是一个精密的题目解析与数据提取引擎。

### Task
识别图片中的题目类型(单选题、多选题、判断题)并给出标准化的答案选项,以便程序直接解析。

### Output Format Rules (Strict Enforcement)
请根据题目类型,严格按下述格式输出,严禁任何解释、前缀或后缀:

1. **单选题 (Single Choice)**: 仅输出一个大写字母。
- 示例:A
2. **多选题 (Multiple Choice)**: 按字母顺序输出所有正确选项,选项间不加任何分隔符。
- 示例:ACD
3. **判断题 (True/False)**: 统一转换为 A 或 B 输出(A 代表正确/对,B 代表错误/错)。
- 示例:A
4. **无法识别**: 若图片无题目或无法辨认,仅输出:NULL

### Requirements
- 忽略所有解题过程,只给最终答案。
- 选项范围:单选 A-D,多选 A-E。
- 所有的输出必须为大写英文字母。
- 如果一张图包含多道题,请用逗号分隔(例如:A,ABD,B)。
""");
messages.add(systemMsg);
// User
JSONObject userMsg = new JSONObject();
userMsg.put("role", "user");
userMsg.put("content", "请给出这道题的正确选项(单选题仅输出一个大写字母,多选题输出所有符合的代表选项的大写字母,判断题输出A代表正确,B代表错误)。仅给出答案,不要输出任何其他内容");
JSONArray images = new JSONArray();
images.add(base64_image);
userMsg.put("images", images);
messages.add(userMsg);

requestBody.put("messages", messages);

// 向Ollama发送请求
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(BASE_URL))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(requestBody.toJSONString()))
.build();
try {
JSONObject responseJson = JSON.parseObject(client.send(request, HttpResponse.BodyHandlers.ofString()).body());
return responseJson.getJSONObject("message").getString("content")
.replaceAll("[^a-zA-Z]", "").toUpperCase().trim();
} catch (IOException | InterruptedException e) {
log.error("出现错误!", e);
return "";
}
}
}

不过后续我还是放弃了。本地AI模型的效果实在是不好。Google提供一些免费的API供我使用,于是我又为了对接Gemini而写了一个类:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package xyz.denvo;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.ProxySelector;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

public class Gemini {
private static final String MODEL_ID = "gemini-2.5-pro";
private static final String API_KEY = "xxxxxxxx";
private static final String BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models/" + MODEL_ID + ":generateContent?key=" + API_KEY;

private static final Logger log = LoggerFactory.getLogger(Gemini.class);
private static final HttpClient client = HttpClient.newBuilder()
//.proxy(ProxySelector.of(new InetSocketAddress("127.0.0.1", 7897)))
.build();

public static String getAnswer(String base64_image) {
// 1. 构建 System Instruction
JSONObject systemInstruction = new JSONObject();
JSONArray systemParts = new JSONArray();
JSONObject systemText = new JSONObject();
systemText.put("text", """
### Role
你是一个精密的题目解析与数据提取引擎。

### Task
识别图片中的题目类型(单选题、多选题、判断题)并给出标准化的答案选项,以便程序直接解析。

### Output Format Rules (Strict Enforcement)
请根据题目类型,严格按下述格式输出,严禁任何解释、前缀或后缀:

1. **单选题 (Single Choice)**: 仅输出一个大写字母。
- 示例:A
2. **多选题 (Multiple Choice)**: 按字母顺序输出所有正确选项,选项间不加任何分隔符。
- 示例:ACD
3. **判断题 (True/False)**: 统一转换为 A 或 B 输出(A 代表正确/对,B 代表错误/错)。
- 示例:A
4. **无法识别**: 若图片无题目或无法辨认,仅输出:NULL

### Requirements
- 忽略所有解题过程,只给最终答案。
- 选项范围:单选 A-D,多选 A-E。
- 所有的输出必须为大写英文字母。
- 如果一张图包含多道题,请用逗号分隔(例如:A,ABD,B)。
""");
systemParts.add(systemText);
systemInstruction.put("parts", systemParts);

// 2. 构建 User Content (包含文本和图片)
JSONObject userContent = new JSONObject();
userContent.put("role", "user");
JSONArray userParts = new JSONArray();

// 文本部分
JSONObject userText = new JSONObject();
userText.put("text", "请给出这道题的正确选项(单选题仅输出一个大写字母,多选题输出所有符合的代表选项的大写字母,判断题输出A代表正确,B代表错误)。仅给出答案,不要输出任何其他内容");
userParts.add(userText);

// 图片部分 (Gemini 使用 inline_data 结构)
JSONObject imageData = new JSONObject();
JSONObject inlineData = new JSONObject();
inlineData.put("mime_type", "image/png");
inlineData.put("data", base64_image);
imageData.put("inline_data", inlineData);
userParts.add(imageData);

userContent.put("parts", userParts);

// 3. 构建完整的 Request Body
JSONObject requestBody = new JSONObject();
JSONArray contents = new JSONArray();
contents.add(userContent);

requestBody.put("contents", contents);
requestBody.put("system_instruction", systemInstruction);

// 4. 配置生成参数 (关闭随机性)
JSONObject generationConfig = new JSONObject();
generationConfig.put("temperature", 0.0);
requestBody.put("generationConfig", generationConfig);

// 5. 发送 HTTP 请求
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(BASE_URL))
.header("Content-Type", "application/json")
//.header("Authorization", "Bearer " + API_KEY)
.POST(HttpRequest.BodyPublishers.ofString(requestBody.toJSONString()))
.build();

try {
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());

if (response.statusCode() == 429) {
log.warn("Gemini API 触发速率限制,请增加脚本延时!");
return "";
}

JSONObject responseJson = JSON.parseObject(response.body());

log.debug("Gemini返回:{}", responseJson.toJSONString());

// Gemini 的响应路径: candidates[0].content.parts[0].text
String content = responseJson.getJSONArray("candidates")
.getJSONObject(0)
.getJSONObject("content")
.getJSONArray("parts")
.getJSONObject(0)
.getString("text");

// 清理并格式化输出
return content.replaceAll("[^a-zA-Z]", "").toUpperCase().trim();

} catch (IOException | InterruptedException | NullPointerException e) {
log.error("出现错误!", e);
return "";
}
}
}

但实测下来,免费的小模型的答题正确率偏低。而好用的模型(如gemini 3.1 pro)需要付费,所以也不好用。不过最后,通过一个中转站,通过Gemini格式的API,最终还是用上了GPT-5.5模型。最后才勉强达到了效果。

部分中转站鉴权的API-key是放在HTTP Header中的,如果需要的话,可以取消注释掉上面Gemini类中的第96行代码来把API-key放在Header中。

最后

其实很多人应该只需要前面的脚本,对后面的内容其实不太感冒? 如果觉得脚本好用的话,可以在评论区留言支持一下吗?感谢大家的支持!

  • 标题: UESTC离散数学自动答题脚本
  • 作者: Denvo
  • 创建于 : 2026-05-10 21:16:01
  • 更新于 : 2026-05-10 21:21:17
  • 链接: https://www.denvoshome.xyz/posts/UESTC_discrete_math_script/
  • 版权声明: 本文章采用 CC BY-SA 4.0 进行许可。
评论