1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
|
import { readFile } from "fs/promises";
import { z } from "zod";
const zodType = z.array(
z.object({
articleId: z.number().int().min(0),
title: z.string(),
timestampMs: z.number().int().min(0),
maxPageNumber: z.number().int().min(1),
tags: z.array(z.string()),
breadLinks: z.array(z.string()).min(1),
})
);
const MAX_ITEM_LIMIT = process.env["AKIBA_SOUKEN_MAX_ITEM_LIMIT"];
export class ArticleLoader {
static instance = new ArticleLoader();
private constructor() { }
private dataCache: Awaited<ReturnType<ArticleLoader["_loadData"]>> | null = null;
async loadData() {
if (this.dataCache != null) {
return this.dataCache;
}
const loadedData = await this._loadData();
this.dataCache = loadedData;
return loadedData;
}
private async _loadData() {
const articleJsonPath = process.env["AKIBA_SOUKEN_ARTICLE_JSON"]!;
const jsonStr = await readFile(articleJsonPath, { encoding: "utf-8" }).then(text => {
const jsonObj = JSON.parse(text);
return jsonObj;
});
const parsedObj = zodType.parse(jsonStr);
parsedObj.sort((a, b) => {
return b.timestampMs - a.timestampMs;
});
if (MAX_ITEM_LIMIT != null) {
parsedObj.length = Math.min(Number(MAX_ITEM_LIMIT), parsedObj.length);
}
return parsedObj;
}
async getCategoryList() {
const loadedData = await this.loadData();
// key:カテゴリ名 , val:回数
const categoryCount = new Map<string, number>();
for (const a of loadedData) {
if (a.breadLinks.length == 0) {
continue;
}
const category = a.breadLinks[0];
if (categoryCount.has(category)) {
categoryCount.set(category, categoryCount.get(category)! + 1);
} else {
categoryCount.set(category, 1);
}
}
const categoryList: { name: string, count: number }[] = [];
for (const [name, count] of categoryCount) {
categoryList.push({ name: name, count });
}
categoryList.sort((a, b) => {
return b.count - a.count;
});
return categoryList;
}
/**
*
* @returns [{tag:"タグ名",category:string,count:100}] の値。カテゴリに属していないタグはカテゴリが空文字。
* カテゴリ自体を指す場合は {tag:"ホビー",category:"ホビー",count:100} の様に同じ値が入る事がある。
* ソート順は未定義
*/
async getTagList() {
const loadedData = await this.loadData();
// 先にパンくずリストを全部見る
const breadcrumb = new Breadcrumb();
for (const a of loadedData) {
breadcrumb.push(a.breadLinks);
}
// key:タグ名 , val:回数
const tagCount = new Map<string, number>();
// パンくずリストに含まれないタグを調査
for (const a of loadedData) {
for (const tag of a.tags) {
if (breadcrumb.strExists(tag)) {
continue;
}
const tagData = tagCount.get(tag);
if (tagData != null) {
tagCount.set(tag, tagData + 1);
} else {
tagCount.set(tag, 1);
}
}
}
const result: { tag: string, category: string, count: number }[] = [];
for (const [name, count] of tagCount) {
result.push({ tag: name, count: count, category: "" });
}
for (const b of breadcrumb.getFlatArray()) {
const breadcrumbName = b.breadcrumb[b.breadcrumb.length - 1];//パンくずリストの最後の項目
const category = b.breadcrumb[0];//カテゴリ名
result.push({ tag: breadcrumbName, count: b.count, category: category });
}
return result;
}
}
type BreadcrumbInternal = { name: string, count: number, child: BreadcrumbInternal[] };
/**
* パンくずリストを管理
* ・各記事には最低2個以上のパンくずリストがある
* ・ホビー>バンダイ>S.H.Figuarts の様に親子関係がある
* ・文字は一箇所でしか使われない。例えば、バンダイという文字はホビーの下にしか存在しない。
*/
class Breadcrumb {
private set = new Set<string>();
private data: BreadcrumbInternal[] = [];
strExists(str: string) {
return this.set.has(str);
}
push(breadLinks: string[]) {
if (breadLinks.length == 0) {
throw new Error(`配列が0個です`);
}
breadLinks.forEach(b => {
this.set.add(b);
})
this.setChild(
this.data,
breadLinks,
);
}
getFlatArray() {
function hoge(dataList: BreadcrumbInternal[], parentBreadcrumbList: string[]) {
const sortedDataList = [...dataList].toSorted((a, b) => {
return a.name.toLowerCase().localeCompare(b.name.toLowerCase());
});
const result: { breadcrumb: string[], count: number }[] = [];
for (const v of sortedDataList) {
if (v.child.length == 0) {
if (v.count == 0) {
throw new Error(`子が0個で、個数も0はありえない`);
} else {
result.push({ breadcrumb: [...parentBreadcrumbList, v.name], count: v.count });
}
} else {
if (v.count != 0) {
result.push({ breadcrumb: [...parentBreadcrumbList, v.name], count: v.count });
}
const childResult = hoge(v.child, [...parentBreadcrumbList, v.name]);
childResult.forEach(c => {
result.push(c);
})
}
}
return result;
}
const result = hoge(this.data, []);
return result;
}
private setChild(pushTarget: BreadcrumbInternal[], breadLinks: string[]) {
const [top, ...nextBreadLinks] = breadLinks;
const isLast = breadLinks.length == 1;
const data = pushTarget.find(d => d.name == top);
if (data) {
if (isLast) {
data.count += 1;
} else {
data.count += 1;
this.setChild(data.child, nextBreadLinks)
}
} else {
if (isLast) {
pushTarget.push({ name: top, child: [], count: 1 });
} else {
const newItem: BreadcrumbInternal = this.createChild(nextBreadLinks)
pushTarget.push({ name: top, child: [newItem], count: 0 });
}
}
}
private createChild(nextBreadLinks: string[]): BreadcrumbInternal {
if (nextBreadLinks.length == 0) {
throw new Error();
} else if (nextBreadLinks.length == 1) {
return { child: [], count: 1, name: nextBreadLinks[0] } satisfies BreadcrumbInternal;
} else {
const [nowBreadLink, ...nextNextBreadLinks] = nextBreadLinks;
const child = this.createChild(nextNextBreadLinks);
return { child: [child], count: 0, name: nowBreadLink } satisfies BreadcrumbInternal;
}
}
}
|