Hugo博客文章过长导致search文件无法上传到algolia的问题

问题#

我hugo使用algolia进行搜索，在构建hugo生成静态文件后，将search.json传到algolia中，就可以进行搜索。现在出现了一个小问题，就是我有一篇文章比较长，导致在上传时超过algolia免费套餐的限制，报错如下

1
/opt/1panel/www/sites/cbba-top/index/blog-FixIt/node_modules/atomic-algolia/lib/update.js:69
2
          if (err) throw err;
3
                   ^
4
AlgoliaSearchError: Record at the position 1 objectID=/posts/555833d/:0:0 is too big size=10236/10000 bytes. Please have a look at https://www.algolia.com/doc/guides/sending-and-managing-data/prepare-your-data/in-depth/index-and-records-size-and-usage-limitations/#record-size-limits
5
    at success (/opt/1panel/www/sites/cbba-top/index/blog-FixIt/node_modules/algoliasearch/src/AlgoliaSearchCore.js:377:32)
6
    at process._tickCallback (internal/process/next_tick.js:68:7)
7
npm ERR! code ELIFECYCLE
8
npm ERR! errno 1
9
npm ERR! blog-fixit@1.0.0 algolia: `atomic-algolia`
10
npm ERR! Exit status 1
11
npm ERR!
12
npm ERR! Failed at the blog-fixit@1.0.0 algolia script.
13
npm ERR! This is probably not a problem with npm. There is likely additional logging output above.
14

15
npm ERR! A complete log of this run can be found in:
16
npm ERR!     /root/.npm/_logs/2025-07-27T14_49_03_656Z-debug.log

解决思路有两个：

生成静态文件后，编写一个脚本将search.json中超出10k的文章拆分成多个algolia记录
生成静态文件后，编写一个脚本将

于是我测试了一下hugo使用algolia在网页中搜索好不好用，测试下来发现还行，但是涉及到某个细微的点就容易搜索不到，比如我哪个文章中提及了点别的技术，我想不起来是哪篇文章就只能靠搜索，这时网页大概率是搜不出来的，还是要靠本地搜索，所以我选择在生成search.json后将文章过长的部分截断

通过truncate.js截断search.json#

以下是针对Algolia记录大小限制的截断代码truncate.js，实现按字节数检测超长内容并截断

在hugo主目录创建truncate.js文件，内容如下：

1
const fs = require('fs');
2
const path = require('path');
3

4
const INPUT_FILE = 'search.json';
5
const OUTPUT_FILE = 'search-truncated.json';
6
const MAX_BYTES = 9000; // 10KB，留出余量
7

8
// UTF-8 字节计算函数
9
function getByteLength(str) {
10
  return Buffer.byteLength(str, 'utf8');
11
}
12

13
// 截断内容，使其不超过 MAX_BYTES 字节
14
function truncateToBytes(str, maxBytes) {
15
  let bytes = 0;
16
  let result = '';
17
  // 通过for循环，逐个读取字符，每次都判断读取到现在位置的总字节数是否超过最大值，
18
  // 如果没超过则将该字符添加到result字符串中，直至总字节数超过最大值，此时的result字符串就是截断后保留的部分
19
  // 逐个字符处理可以避免在字符中间截断导致乱码
20
  for (let i = 0; i < str.length; i++) {
21
    const char = str[i];
22
    const charByteLength = Buffer.byteLength(char, 'utf8');
23
    if (bytes + charByteLength > maxBytes) {
24
      break;
25
    }
26
    bytes += charByteLength;
27
    result += char;
28
  }
29

30
  return result;
31
}
32

33
try {
34
  const rawData = fs.readFileSync(INPUT_FILE, 'utf8');
35
  const records = JSON.parse(rawData); // 解析json数据
36

37
  console.log(`✅ 成功加载 ${records.length} 条记录`);
38

39
  const processed = records.map((item, index) => {
40
    const contentBytes = getByteLength(item.content || ''); // 计算内容字节数
41
    const title = item.title || `第${index + 1}条记录（无标题）`; // 处理空标题
42

43
    // 如果字节数超过限制则截断
44
    if (contentBytes > MAX_BYTES) {
45
      const truncated = truncateToBytes(item.content, MAX_BYTES);
46
      console.log(`✂️ 截断处理，标题: ${title}，原字节数: ${contentBytes} -> 截断后: ${getByteLength(truncated)} 字节`);
47
      return { ...item, content: truncated }; // 返回截断后的新对象
48
    } else {
49
      console.log(`👍 无需截断，标题: ${title}，字节数: ${contentBytes}`);
50
      return item; // 返回原对象
51
    }
52
  });
53

54
  fs.writeFileSync(OUTPUT_FILE, JSON.stringify(processed, null, 2), 'utf8');
55
  console.log(`✅ 写入成功: ${OUTPUT_FILE}`);
56
} catch (err) {
57
  console.error(`❌ 错误: ${err.message}`);
58
}

修改deploy.sh文件

将上传的文件由public/search.json改为主目录中新生成的search-truncated.json文件
添加执行截断脚本的指令

1
#!/bin/bash
2

3
# 构建 Hugo 网站
4
hugo
5

6
# 对超过algolia要求的大小的记录进行截断
7
node truncate.js
8

9
# 上传到 Algolia
10
ALGOLIA_APP_ID=ZSH625MG3A \
11
ALGOLIA_ADMIN_KEY=a4221316a50d7dea962f44d3593eae86 \
12
ALGOLIA_INDEX_NAME=index.zh-cn \
13
ALGOLIA_INDEX_FILE=search-truncated.json \
14
npm run algolia