org-report-stats/parse_orgmode_to_json.mjs

/**
 * convertir un fichier .org vers des données structurées en json
 * @type {*}
 */
import fs from 'node-fs';
import moment from 'moment';


/**********************
 * initialize configs
 **********************/

const sourceFileName = 'all_tasks.org'
const sourceFilePath = './sources/' + sourceFileName;

let headers = []
let tasksObjectsForJsonExport = []
let headersByKind = {}
let writeJsonAfterParse = false;
writeJsonAfterParse = true;

/**************************************************************
 * fetch the source orgmode file to read its contents
 *************************************************************/

console.log('parse some org file', sourceFilePath)
if (!sourceFilePath) {
    console.error('pas de fichier à ouvrir')
}
fs.stat(sourceFilePath, function (err, stat) {
    if (err == null) {
        console.log(`File ${sourceFilePath} exists`);

    } else if (err.code === 'ENOENT') {
        // file does not exist
        console.error(`le fichier ${sourceFilePath} est introuvable. Impossible d en extraire des infos.`, err);
    } else {
        console.log('Some other error: ', err.code);
    }
});

/**********************
 * search elements
 *********************/
let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING'];
let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled'];
let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END'];

let propertiesSection = {} // TODO properties listing
let logBookSection = {} // TODO logbook listing

let statistics = {
    tags: {},
    words: {}
}

let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']'
/**
 * task object example
 * @type {{level: string, header: string, dates: {CREATED: string, DONE: string, REFILED: string}, state: string, content: string, properties: {}, tags: [], tagsInherited: []}}
 */
let task = {
    header: "",
    level: "",
    corpus: "",
    state: "",
    tags: [],
    tagsInherited: [],
    dates: {},
    logbook: {},
    properties: {},
}

let isHeader = false;
let isProperty = false;
let isLogbook = false;
let isFirst = true;

// init first task object as empty clone
let currentTask = {...task};

/**
 * add to tasks to export and refresh current task
 */
function addAndRefreshCurrentTask() {
    tasksObjectsForJsonExport.push(currentTask)
    currentTask = {...task};
    currentTask.dates = {};
};

function makeWordsStatistics(sentence) {
    sentence.split(' ')?.forEach(word => {
        if (!statistics.words[word]) {
            statistics.words[word] = 0
        }
        statistics.words[word]++
    })
}

/**********************
 * loop to parse all
 *********************/
fs.readFile(sourceFilePath, 'utf8', function (err, data) {


    if (err) {
        return console.log(err);
    }
    console.log(" parsing...")
    // parcourir chaque ligne du fichier org
    let everyline = data.split('\n');

    // trouver les entêtes toutes les lignes qui commencent par * et espace.

    everyline.forEach((line) => {


        // gérer la création d'objets définissant les tâches et leurs propriétés
        if (line.match(/^\*+? /)) {
            // add last task to export list
            if (!isFirst) {

                addAndRefreshCurrentTask();
            } else {
                isFirst = false;
            }

            isHeader = true;
            // compter les étoiles pour trouver le niveau du header
            currentTask.level = line.match(/\*/g)?.length

            // create a new task


            headers.push(cleanHeader(line))
            currentTask.header = cleanHeader(line);
            makeWordsStatistics(cleanHeader(line));
            stateKeywordList.forEach(keyword => {
                let keywordIsFound = lineHasKeyword(line, keyword)

                if (keywordIsFound) {
                    currentTask.state = keyword
                }
            })


            // trouver les tags
            let tagsFound = line.match(/\:(.*)\:/g)
            if (tagsFound) {
                tagsFound = tagsFound[0];
                let tagList = tagsFound.split(':');
                tagList?.forEach(tag => {
                    if (tag.length > 1) {

                        if (!statistics.tags[tag]) {
                            statistics.tags[tag] = 0
                        }
                        statistics.tags[tag]++

                        currentTask.tags.push(tag)
                    }
                })
            }


            // ------------- fin des recherches dans la ligne de Header -------------
        } else {
            isHeader = false;
        }
        // examen des lignes de corps de tâche, ou de corps de section suite au header.
        // classer les dates de création, cloture, et de logbook
        let dateFound = searchDate(line)
        if (dateFound) {

            dateKeywordList.forEach(keyword => {
                if (lineHasSubstring(line, keyword)) {
                    if (!currentTask.dates[keyword]) {
                        currentTask.dates[keyword] = '';
                    }
                    currentTask.dates[keyword] = new Date(dateFound[0]);
                } else {
                    // console.log('keyword', keyword)
                }
            })
        } else {

            if (line.indexOf(dateKeywordList) !== -1 && line.indexOf(stateKeywordList) !== -1 && line.indexOf(sectionKeywordList) !== -1) {

                makeWordsStatistics(line)
                // ajouter le corps complet de la section après le header
                if (line.length && !isHeader) {

                    let cleanedLine = line.replace(/\s\s/g, ' ');
                    cleanedLine = line.replace(/ {2,}/g, ' ')

                    currentTask.corpus += `${cleanedLine}
`
                }
            }
        }
    })
    // ajouter la dernière tâche parsée
    addAndRefreshCurrentTask();

    console.log(" parsing fini")
    // stateKeywordList.forEach(keyword => console.log('nombre de headers', keyword, headersByKind[keyword]?.length))


    const jsonContent = {
        statistics: {
            lines_count: everyline.length,
            headers_count: headers.length,
            statistics: Object.keys(statistics).sort(function (a, b) {
                return statistics[a] - statistics[b]
            })

        },
        meta_data: {
            author: '@tykayn@mastodon.Cipherbliss.com',
            generated_at: new Date(),
            generated_from_file: sourceFilePath + sourceFileName,
            sources: 'https://forge.chapril.org/tykayn/org-report-stats.git'
        },
        tasks_list: tasksObjectsForJsonExport
    }

    console.log('statistics', statistics)
    // console.log('tasksObjectsForJsonExport', jsonContent)

    if (writeJsonAfterParse) {
        writeJsonFile('export_' + sourceFileName + '_parsed.json', JSON.stringify(jsonContent));
    }

})

function lineHasKeyword(line, keyword = 'TODO') {

    let isFound = (line.indexOf('* ' + keyword) !== -1)
    if (isFound) {
        createNewHeaderKind(keyword)
        headersByKind[keyword].push(line);
        if (!statistics[keyword]) {
            statistics[keyword] = 0
        }
        statistics[keyword]++
    }
    return isFound;
}

function lineHasSubstring(line, keyword) {
    let isFound = (line.indexOf(keyword) !== -1)
    if (!statistics[keyword]) {
        statistics[keyword] = 0
    }
    statistics[keyword]++

    return isFound
}

function createNewHeaderKind(keyword) {
    if (!headersByKind[keyword]) {
        headersByKind[keyword] = [];
    }
}

/**
 * chercher des dates et heures au format
 * YYYY-MM-DD HH:II:SS
 *
 * @param line
 * @returns {*}
 */
function searchDate(line) {
    // return line.match(/[(\d{4}\-\d{2}\-\d{2} ?\d{2}?\:?\d{2}?\:?\d{2}?)(\d{4}\-\d{2}\-\d{2})]/)
    let simpleDay = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.?/)
    let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/)
    let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/)

    if (simpleDayHourSec) {
        return simpleDayHourSec;
    }

    if (simpleDayHour) {
        return simpleDayHour;
    }
    if (simpleDay) {
        return simpleDay;
    }

}

/**
 * afin de trouver la première date liée à une tâche parmi celles mentionnées, il faut comparer les dates
 * @param date1
 * @param date2
 */
function compareDatesAndKeepOldest(date1, date2) {
    date1 = moment(date1)
    date2 = moment(date2)
}

/**
 * get the cleaned content of the header
 * @param line
 */
function cleanHeader(line) {

    line = '' + line;
    stateKeywordList.forEach(keyword => {
        line = line.replace(keyword, '')
    })
    line = line.replace(/\** /, '');
    line = line.replace(/\[.*\]/g, '');
    line = line.replace(/\:.*\:/g, '');
    line = line.replace('  ', '');
    return line.trim();
}

function writeJsonFile(fileName, fileContent) {
    console.log('write file ', fileName);

    return fs.writeFile(
        `./output/${fileName}`,
        fileContent,
        "utf8",
        (err) => {
            if (err) {
                console.log(`Error writing file: ${err}`);
            } else {
                console.log(`File ${fileName} is written successfully!`);
            }
        }
    );
}