org-report-stats/parse_orgmode_to_json.mjs

334 lines
9.1 KiB
JavaScript
Raw Normal View History

2023-03-04 23:52:46 +01:00
/**
* convertir un fichier .org vers des données structurées en json
* @type {*}
*/
import fs from 'node-fs';
2023-03-05 23:13:16 +01:00
import moment from 'moment';
/**********************
* initialize configs
**********************/
const sourceFileName = 'demo_more.org'
const sourceFilePath = './sources/' + sourceFileName;
2023-03-04 23:52:46 +01:00
let headers = []
2023-03-04 23:57:03 +01:00
let tasksObjectsForJsonExport = []
2023-03-04 23:52:46 +01:00
let headersByKind = {}
2023-03-05 23:13:16 +01:00
let writeJsonAfterParse = false;
writeJsonAfterParse = true;
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
/**************************************************************
* fetch the source orgmode file to read its contents
*************************************************************/
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
console.log('parse some org file', sourceFilePath)
if (!sourceFilePath) {
console.error('pas de fichier à ouvrir')
}
2023-03-04 23:52:46 +01:00
fs.stat(sourceFilePath, function (err, stat) {
if (err == null) {
console.log(`File ${sourceFilePath} exists`);
} else if (err.code === 'ENOENT') {
// file does not exist
console.error(`le fichier ${sourceFilePath} est introuvable. Impossible d en extraire des infos.`, err);
} else {
console.log('Some other error: ', err.code);
}
});
2023-03-05 23:13:16 +01:00
/**********************
* search elements
*********************/
let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING'];
let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled'];
2023-03-05 23:13:16 +01:00
let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END'];
let propertiesSection = {} // TODO properties listing
let logBookSection = {} // TODO logbook listing
let statistics = {
tags: {},
words: {}
}
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']'
/**
* task object example
* @type {{level: string, header: string, dates: {CREATED: string, DONE: string, REFILED: string}, state: string, content: string, properties: {}, tags: [], tagsInherited: []}}
*/
2023-03-05 11:05:06 +01:00
let task = {
2023-03-05 23:13:16 +01:00
header: "",
level: "",
corpus: "",
2023-03-05 23:13:16 +01:00
state: "",
tags: [],
tagsInherited: [],
dates: {},
logbook: {},
properties: {},
2023-03-05 11:05:06 +01:00
}
2023-03-05 23:13:16 +01:00
let isHeader = false;
let isProperty = false;
let isLogbook = false;
let isFirst = true;
2023-03-05 11:05:06 +01:00
// init first task object as empty clone
let currentTask = {...task};
/**
* add to tasks to export and refresh current task
*/
function addAndRefreshCurrentTask() {
tasksObjectsForJsonExport.push(currentTask)
currentTask = {...task};
currentTask.dates = {};
};
function makeWordsStatistics(sentence) {
sentence.split(' ')?.forEach(word => {
if (!statistics.words[word]) {
statistics.words[word] = 0
}
statistics.words[word]++
})
}
2023-03-05 23:13:16 +01:00
/**********************
* loop to parse all
*********************/
2023-03-04 23:52:46 +01:00
fs.readFile(sourceFilePath, 'utf8', function (err, data) {
2023-03-04 23:52:46 +01:00
if (err) {
return console.log(err);
}
console.log(" parsing...")
// parcourir chaque ligne du fichier org
let everyline = data.split('\n');
// trouver les entêtes toutes les lignes qui commencent par * et espace.
everyline.forEach((line) => {
2023-03-05 23:13:16 +01:00
// gérer la création d'objets définissant les tâches et leurs propriétés
2023-03-04 23:52:46 +01:00
if (line.match(/^\*+? /)) {
2023-03-05 11:05:06 +01:00
// add last task to export list
2023-03-05 23:13:16 +01:00
if (!isFirst) {
addAndRefreshCurrentTask();
2023-03-05 23:13:16 +01:00
} else {
isFirst = false;
}
isHeader = true;
// compter les étoiles pour trouver le niveau du header
currentTask.level = line.match(/\*/g)?.length
2023-03-05 11:05:06 +01:00
// create a new task
2023-03-05 23:13:16 +01:00
headers.push(cleanHeader(line))
currentTask.header = cleanHeader(line);
makeWordsStatistics(cleanHeader(line));
2023-03-05 23:13:16 +01:00
stateKeywordList.forEach(keyword => {
let keywordIsFound = lineHasKeyword(line, keyword)
2023-03-05 11:05:06 +01:00
2023-03-05 23:13:16 +01:00
if (keywordIsFound) {
2023-03-05 11:05:06 +01:00
currentTask.state = keyword
}
2023-03-05 23:13:16 +01:00
})
// trouver les tags
let tagsFound = line.match(/\:(.*)\:/g)
if (tagsFound) {
tagsFound = tagsFound[0];
let tagList = tagsFound.split(':');
tagList?.forEach(tag => {
if (tag.length > 1) {
if (!statistics.tags[tag]) {
statistics.tags[tag] = 0
}
statistics.tags[tag]++
currentTask.tags.push(tag)
}
})
2023-03-05 23:13:16 +01:00
}
// ------------- fin des recherches dans la ligne de Header -------------
2023-03-05 23:13:16 +01:00
} else {
isHeader = false;
}
// examen des lignes de corps de tâche, ou de corps de section suite au header.
// classer les dates de création, cloture, et de logbook
let dateFound = searchDate(line)
if (dateFound) {
dateKeywordList.forEach(keyword => {
if (lineHasSubstring(line, keyword)) {
if (!currentTask.dates[keyword]) {
currentTask.dates[keyword] = '';
}
currentTask.dates[keyword] = new Date(dateFound[0]);
} else {
// console.log('keyword', keyword)
2023-03-05 11:05:06 +01:00
}
})
} else {
2023-03-05 11:05:06 +01:00
if (line.indexOf(dateKeywordList) !== -1 && line.indexOf(stateKeywordList) !== -1 && line.indexOf(sectionKeywordList) !== -1) {
2023-03-05 11:05:06 +01:00
makeWordsStatistics(line)
// ajouter le corps complet de la section après le header
if (line.length && !isHeader) {
let cleanedLine = line.replace(/\s\s/g, ' ');
cleanedLine = line.replace(/ {2,}/g, ' ')
2023-03-05 23:13:16 +01:00
currentTask.corpus += `${cleanedLine}
`
}
}
}
2023-03-04 23:52:46 +01:00
})
2023-03-05 23:13:16 +01:00
// ajouter la dernière tâche parsée
addAndRefreshCurrentTask();
2023-03-05 23:13:16 +01:00
2023-03-04 23:52:46 +01:00
console.log(" parsing fini")
// stateKeywordList.forEach(keyword => console.log('nombre de headers', keyword, headersByKind[keyword]?.length))
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
const jsonContent = {
statistics: {
lines_count: everyline.length,
headers_count: headers.length,
statistics: Object.keys(statistics).sort(function (a, b) {
return statistics[a] - statistics[b]
})
2023-03-05 23:13:16 +01:00
},
meta_data: {
author: '@tykayn@mastodon.Cipherbliss.com',
generated_at: new Date(),
generated_from_file: sourceFilePath + sourceFileName,
sources: 'https://forge.chapril.org/tykayn/org-report-stats.git'
},
tasks_list: tasksObjectsForJsonExport
}
console.log('statistics', statistics)
2023-03-05 23:13:16 +01:00
// console.log('tasksObjectsForJsonExport', jsonContent)
if (writeJsonAfterParse) {
writeJsonFile('export_' + sourceFileName + '_parsed.json', JSON.stringify(jsonContent));
2023-03-05 23:13:16 +01:00
}
2023-03-04 23:57:03 +01:00
2023-03-04 23:52:46 +01:00
})
2023-03-05 23:13:16 +01:00
function lineHasKeyword(line, keyword = 'TODO') {
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
let isFound = (line.indexOf('* ' + keyword) !== -1)
if (isFound) {
createNewHeaderKind(keyword)
2023-03-04 23:52:46 +01:00
headersByKind[keyword].push(line);
if (!statistics[keyword]) {
statistics[keyword] = 0
}
statistics[keyword]++
2023-03-04 23:52:46 +01:00
}
2023-03-05 23:13:16 +01:00
return isFound;
}
function lineHasSubstring(line, keyword) {
let isFound = (line.indexOf(keyword) !== -1)
if (!statistics[keyword]) {
statistics[keyword] = 0
}
statistics[keyword]++
2023-03-05 23:13:16 +01:00
return isFound
2023-03-04 23:52:46 +01:00
}
function createNewHeaderKind(keyword) {
if (!headersByKind[keyword]) {
headersByKind[keyword] = [];
}
2023-03-04 23:57:03 +01:00
}
2023-03-05 23:13:16 +01:00
/**
* chercher des dates et heures au format
* YYYY-MM-DD HH:II:SS
*
* @param line
* @returns {*}
*/
function searchDate(line) {
// return line.match(/[(\d{4}\-\d{2}\-\d{2} ?\d{2}?\:?\d{2}?\:?\d{2}?)(\d{4}\-\d{2}\-\d{2})]/)
let simpleDay = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.?/)
let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/)
let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/)
if (simpleDayHourSec) {
2023-03-05 23:13:16 +01:00
return simpleDayHourSec;
}
if (simpleDayHour) {
2023-03-05 23:13:16 +01:00
return simpleDayHour;
}
if (simpleDay) {
2023-03-05 23:13:16 +01:00
return simpleDay;
}
}
/**
* afin de trouver la première date liée à une tâche parmi celles mentionnées, il faut comparer les dates
* @param date1
* @param date2
*/
function compareDatesAndKeepOldest(date1, date2) {
date1 = moment(date1)
date2 = moment(date2)
}
2023-03-04 23:57:03 +01:00
/**
* get the cleaned content of the header
* @param line
*/
function cleanHeader(line) {
line = '' + line;
stateKeywordList.forEach(keyword => {
line = line.replace(keyword, '')
})
line = line.replace(/\** /, '');
line = line.replace(/\[.*\]/g, '');
line = line.replace(/\:.*\:/g, '');
line = line.replace(' ', '');
return line.trim();
}
2023-03-05 11:05:06 +01:00
function writeJsonFile(fileName, fileContent) {
console.log('write file ', fileName);
2023-03-05 23:13:16 +01:00
2023-03-05 11:05:06 +01:00
return fs.writeFile(
`./output/${fileName}`,
fileContent,
"utf8",
(err) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
2023-03-04 23:52:46 +01:00
}