const axios = require('axios');
const cheerio = require('cheerio');
const FormData = require('form-data');
class KBBI {
login = async function () {
try {
const { data, headers } = await axios.get('https://kbbi.kemendikdasmen.go.id/Account/Login');
const $ = cheerio.load(data);
const form = new FormData();
form.append('__RequestVerificationToken', $('input[name="__RequestVerificationToken"]').attr('value'));
form.append('Posel', 'rynekoo@usako.net');
form.append('KataSandi', 'Rynekoo2009');
form.append('IngatSaya', 'true');
const { headers: head } = await axios.post('https://kbbi.kemendikdasmen.go.id/Account/Login', form, {
headers: {
cookie: headers['set-cookie'].join('; '),
...form.getHeaders()
},
maxRedirects: 0,
validateStatus: function (status) {
return status >= 200 && status < 400;
}
});
return head['set-cookie'].join('; ');
} catch (error) {
throw new Error(error.message);
}
}
search = async function (word) {
try {
if (!word) throw new Error('Word is required.');
const cookies = await this.login();
const { data } = await axios.get(`https://kbbi.kemendikdasmen.go.id/entri/${word}`, {
headers: {
cookie: cookies
}
});
const $ = cheerio.load(data);
const allHomographs = [];
const entryElements = $('h2[style*=\'margin-bottom:3px\']');
entryElements.each((index, element) => {
const $currentH2 = $(element);
const $clonedH2 = $currentH2.clone();
const $nonStandardSmall = $clonedH2.find('small:contains(\'bentuk tidak baku:\')');
let kataTidakBaku = null;
if ($nonStandardSmall.length > 0) {
kataTidakBaku = $nonStandardSmall.find('b').text().trim();
$nonStandardSmall.remove();
}
const wordKey = $clonedH2.text().trim().replace(/(\d+)/g, '^$1');
let entryDetails = {
makna: [],
kata_tidak_baku: kataTidakBaku || null,
kata_turunan: [],
gabungan_kata: []
};
if (kataTidakBaku) {
entryDetails['kata_tidak_baku'] = kataTidakBaku;
}
const meaningListElement = $currentH2.nextAll('ul.adjusted-par, ol.last-list-child').first();
if (meaningListElement.length > 0) {
meaningListElement.find('li').each((i, liElem) => {
const $li = $(liElem);
const usulkanMaknaBaruLink = $li.find('a.entrisButton span[title=\'Usulkan makna baru\']');
if (usulkanMaknaBaruLink.length > 0) {
return true;
}
const kelas_kata = $li.find('span[title]').attr('title');
const $clonedLi = $li.clone();
$clonedLi.find('font[color=\'red\'] > i > span[title]').closest('font').remove();
$clonedLi.find('span.entrisButton').remove();
let deskripsiHtml = $clonedLi.html();
deskripsiHtml = deskripsiHtml.replace(/<font color='(grey|brown)'><i>\s*<\/i><\/font>/g, '');
deskripsiHtml = deskripsiHtml.replace(/<font color='(grey|brown)'><i>(.*?)<\/i><\/font>/g, '$2');
const deskripsi = cheerio.load(deskripsiHtml).text().trim().replace(/\s+/g, ' ');
if (kelas_kata && deskripsi) {
entryDetails.makna.push({
kelas_kata,
deskripsi
});
}
});
}
let currentSibling = meaningListElement.next();
while (currentSibling.length > 0 && !currentSibling.is('h2[style*=\'margin-bottom:3px\']') && !currentSibling.is('h4:contains(\'Peribahasa\')') && !currentSibling.is('h4:contains(\'Idiom\')')) {
if (currentSibling.is('h4')) {
const h4Text = currentSibling.text().trim();
const nextUl = currentSibling.nextAll('ul.adjusted-par').first();
const items = [];
if (nextUl.length > 0) {
nextUl.find('li a').each((i, el) => {
items.push($(el).text().trim());
});
}
if (items.length > 0) {
if (h4Text.includes('Kata Turunan')) {
entryDetails['kata_turunan'] = items;
} else if (h4Text.includes('Gabungan Kata')) {
entryDetails['gabungan_kata'] = items;
}
}
currentSibling = (nextUl.length > 0) ? nextUl.next() : currentSibling.next();
} else {
currentSibling = currentSibling.next();
}
}
allHomographs.push({ [wordKey]: entryDetails });
});
const globalPeribahasaH4 = $(`h4:contains('Peribahasa')`).filter((i, el) => $(el).text().includes(`(mengandung [${word}])`)).last();
const globalIdiomH4 = $(`h4:contains('Idiom')`).filter((i, el) => $(el).text().includes(`(mengandung [${word}])`)).last();
let globalPeribahasa = [];
if (globalPeribahasaH4.length > 0) {
globalPeribahasaH4.nextAll('ul.adjusted-par').first().find('li a').each((i, el) => {
globalPeribahasa.push($(el).text().trim());
});
}
let globalIdiom = [];
if (globalIdiomH4.length > 0) {
globalIdiomH4.nextAll('ul.adjusted-par').first().find('li a').each((i, el) => {
globalIdiom.push($(el).text().trim());
});
}
return {
kata: allHomographs,
peribahasa: globalPeribahasa,
idiom: globalIdiom
};
} catch (error) {
throw new Error(error.message);
}
}
};
const k = new KBBI();
k.search('orang').then(console.log);