rynn-k / gists
kbbi.js javascript
const axios = require('axios');
const cheerio = require('cheerio');
const FormData = require('form-data');

class KBBI {
    login = async function () {
        try {
            const { data, headers } = await axios.get('https://kbbi.kemendikdasmen.go.id/Account/Login');
            const $ = cheerio.load(data);
            
            const form = new FormData();
            form.append('__RequestVerificationToken', $('input[name="__RequestVerificationToken"]').attr('value'));
            form.append('Posel', 'rynekoo@usako.net');
            form.append('KataSandi', 'Rynekoo2009');
            form.append('IngatSaya', 'true');
            const { headers: head } = await axios.post('https://kbbi.kemendikdasmen.go.id/Account/Login', form, {
                headers: {
                    cookie: headers['set-cookie'].join('; '),
                    ...form.getHeaders()
                },
                maxRedirects: 0,
                validateStatus: function (status) {
                    return status >= 200 && status < 400;
                }
            });
            
            return head['set-cookie'].join('; ');
        } catch (error) {
            throw new Error(error.message);
        }
    }
    
    search = async function (word) {
        try {
            if (!word) throw new Error('Word is required.');
            
            const cookies = await this.login();
            const { data } = await axios.get(`https://kbbi.kemendikdasmen.go.id/entri/${word}`, {
                headers: {
                    cookie: cookies
                }
            });
            const $ = cheerio.load(data);
            const allHomographs = [];
            
            const entryElements = $('h2[style*=\'margin-bottom:3px\']');
            
            entryElements.each((index, element) => {
                const $currentH2 = $(element);
                const $clonedH2 = $currentH2.clone();
                
                const $nonStandardSmall = $clonedH2.find('small:contains(\'bentuk tidak baku:\')');
                let kataTidakBaku = null;
                if ($nonStandardSmall.length > 0) {
                    kataTidakBaku = $nonStandardSmall.find('b').text().trim();
                    $nonStandardSmall.remove();
                }
                
                const wordKey = $clonedH2.text().trim().replace(/(\d+)/g, '^$1');
                
                let entryDetails = { 
                    makna: [],
                    kata_tidak_baku: kataTidakBaku || null, 
                    kata_turunan: [],
                    gabungan_kata: []
                };
                
                if (kataTidakBaku) {
                    entryDetails['kata_tidak_baku'] = kataTidakBaku;
                }
                
                const meaningListElement = $currentH2.nextAll('ul.adjusted-par, ol.last-list-child').first();
                
                if (meaningListElement.length > 0) {
                    meaningListElement.find('li').each((i, liElem) => {
                        const $li = $(liElem);
                        
                        const usulkanMaknaBaruLink = $li.find('a.entrisButton span[title=\'Usulkan makna baru\']');
                        if (usulkanMaknaBaruLink.length > 0) {
                            return true;
                        }
                        
                        const kelas_kata = $li.find('span[title]').attr('title');
                        const $clonedLi = $li.clone();
                        
                        $clonedLi.find('font[color=\'red\'] > i > span[title]').closest('font').remove();
                        $clonedLi.find('span.entrisButton').remove();
                        
                        let deskripsiHtml = $clonedLi.html();
                        
                        deskripsiHtml = deskripsiHtml.replace(/<font color='(grey|brown)'><i>\s*<\/i><\/font>/g, ''); 
                        deskripsiHtml = deskripsiHtml.replace(/<font color='(grey|brown)'><i>(.*?)<\/i><\/font>/g, '$2'); 
                        
                        const deskripsi = cheerio.load(deskripsiHtml).text().trim().replace(/\s+/g, ' ');
                        
                        if (kelas_kata && deskripsi) {
                            entryDetails.makna.push({
                                kelas_kata,
                                deskripsi
                            });
                        }
                    });
                }
                
                let currentSibling = meaningListElement.next();
                while (currentSibling.length > 0 && !currentSibling.is('h2[style*=\'margin-bottom:3px\']') && !currentSibling.is('h4:contains(\'Peribahasa\')') && !currentSibling.is('h4:contains(\'Idiom\')')) {
                    if (currentSibling.is('h4')) {
                        const h4Text = currentSibling.text().trim();
                        const nextUl = currentSibling.nextAll('ul.adjusted-par').first();
                        const items = [];
                        
                        if (nextUl.length > 0) {
                            nextUl.find('li a').each((i, el) => {
                                items.push($(el).text().trim());
                            });
                        }
                        
                        if (items.length > 0) {
                            if (h4Text.includes('Kata Turunan')) {
                                entryDetails['kata_turunan'] = items;
                            } else if (h4Text.includes('Gabungan Kata')) {
                                entryDetails['gabungan_kata'] = items;
                            }
                        }
                        currentSibling = (nextUl.length > 0) ? nextUl.next() : currentSibling.next();
                    } else {
                        currentSibling = currentSibling.next();
                    }
                }
                
                allHomographs.push({ [wordKey]: entryDetails });
            });
            
            const globalPeribahasaH4 = $(`h4:contains('Peribahasa')`).filter((i, el) => $(el).text().includes(`(mengandung [${word}])`)).last();
            const globalIdiomH4 = $(`h4:contains('Idiom')`).filter((i, el) => $(el).text().includes(`(mengandung [${word}])`)).last();
            
            let globalPeribahasa = [];
            if (globalPeribahasaH4.length > 0) {
                globalPeribahasaH4.nextAll('ul.adjusted-par').first().find('li a').each((i, el) => {
                    globalPeribahasa.push($(el).text().trim());
                });
            }
            
            let globalIdiom = [];
            if (globalIdiomH4.length > 0) {
                globalIdiomH4.nextAll('ul.adjusted-par').first().find('li a').each((i, el) => {
                    globalIdiom.push($(el).text().trim());
                });
            }
            
            return {
                kata: allHomographs,
                peribahasa: globalPeribahasa,
                idiom: globalIdiom
            };
        } catch (error) {
            throw new Error(error.message);
        }
    }
};

// Usage:
const k = new KBBI();
k.search('orang').then(console.log);
7277 bytes ยท Updated Mar 6, 2026