Obsidian with Local API and Firefox Extension
I will split this post in three sections
- Obsidian Plugin (Covered in another article: https://webuxlab.com/en/projects/obsidian-plugin)
- Small Local API in NodeJS
- Firefox Extension
First Section - Obsidian Plugin
The goal of this plugin is to provide an endpoint that will receive data using JSON (encoded in base64) and save it in a page.
Please see : https://webuxlab.com/en/projects/obsidian-plugin
Second Section - The local API
I am familiar with puppeteer, so this is what I use all the time when it is time to do some crawling.
index.js
const express = require("express");
const app = express();
const processing = require("./processing");
const cors = require("cors");
const { open } = require("lmdb");
const Queue = require("better-queue");
const openBrowser = require("open");
const q = new Queue(doStuff, {
concurrent: 2,
maxRetries: 3,
retryDelay: 5000,
maxTimeout: 60000,
});
const hostname = "127.0.0.1";
const port = 3000;
let myDB = open({
path: "entries",
// any options go here, we can turn on compression like this:
compression: true,
});
app.use(cors());
app.use(express.json());
async function doStuff(input, cb) {
const data = await processing(input);
await myDB.put(data.centrisNo || new Date().toISOString(), { ...data });
return cb(null, { input, data });
}
app.get("/status", async (req, res) => {
res.send({
queueInfo: q.getStats(),
info: null,
});
});
app.post("/", async (req, res) => {
try {
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Request-Method", "*");
res.setHeader("Access-Control-Allow-Methods", "OPTIONS, GET");
res.setHeader("Access-Control-Allow-Headers", "*");
if (req.method === "OPTIONS") {
res.writeHead(200);
res.end();
return;
}
const urls = req.body.datas;
if (urls && urls.length === 0) throw new Error("No Url provided.");
urls.map((url) =>
q
.push(url)
.on("finish", async function ({ input, data }) {
console.log("Finished", input);
delete data.errors;
await openBrowser(
`obsidian://endpoint-local/?data=${Buffer.from(
JSON.stringify([data])
).toString("base64")}`,
{ background: true }
);
})
.on("failed", function (err) {
console.error("Error:", err);
})
);
res.send("Thank You!");
} catch (e) {
console.error(e.message);
console.error(e.stack);
res.send("Oops");
}
});
app.listen(port, () => {
console.log(`Server running at http://${hostname}:${port}/`);
});
processing.js:
const puppeteer = require("puppeteer");
module.exports = async (url) => {
const browser = await puppeteer.launch({
headless: true,
timeout: 60000,
});
const page = await browser.newPage();
console.log(`Working on it... (${url})`);
const data = { errors: [], datetime: new Date() };
try {
page.setDefaultTimeout(2000);
page.setUserAgent("SET_YOUR_USER_AGENT");
await page.goto(url, { timeout: 20000 });
// Set screen size
await page.setViewport({ width: 1512, height: 2050 });
// TODO: Add your puppeteer commands and set the result in the data variable
data.url = url;
data.foo = "bar";
//...
} catch (e) {
console.log(e.stack);
data.errors.push(
`${url} - Element might not be found for - '${e.message}'`
);
} finally {
await browser.close();
return data;
}
};
package.json
{
"name": "api",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"author": "Studio Webux",
"license": "MIT",
"dependencies": {
"better-queue": "^3.8.12",
"cors": "^2.8.5",
"express": "^4.18.2",
"lmdb": "^2.7.9",
"puppeteer": "^19.6.3"
}
}
- I use
better-queue
to control the quantity of requests to process, my local computer is limited, so I need to easily control the concurrency of puppeteer. If you are familiar with AWS, this is similar to SQS. cors
andexpress
are together, it simplies offer an API endpoint.lmdb
is completely optional, I was wondering if a local nosql database was out there. For the small amount of tests I did with it… it works great ! lightweight and no setup, perfect for my small projects and experimentations.puppeteer
is the tool used to crawl the web page.
Third Section - Firefox extension
I’m far from an expert, I did two of them so far.
I won’t cover all details, I do recommend that you read the official documentation. It is quite easy to understand.
background.js
function postData(url = "") {
return browser.storage.sync.get("endpoint").then(
async function (data) {
console.log(data);
const response = await fetch(data.endpoint, {
method: "POST",
mode: "cors",
cache: "no-cache",
credentials: "omit",
headers: { "Content-Type": "application/json" },
redirect: "follow",
referrerPolicy: "no-referrer",
body: JSON.stringify({
datas: [url],
}),
});
return response.text();
},
function (error) {
console.error(error);
throw new Error(error);
}
);
}
function handleMessage(message) {
if (message.url) {
return postData(message.url).then(function () {
return true;
});
} else {
throw new Error("Missing an URL");
}
}
browser.runtime.onMessage.addListener(handleMessage);
browser.webNavigation.onHistoryStateUpdated.addListener(
function () {
browser.tabs.executeScript(null, { file: "logic.js" });
},
{
url: [{ originAndPathMatches: "^.+://.*/.+/.+$" }],
}
);
logic.js
browser.storage.sync.set({
endpoint: "http://localhost:3000",
});
document.addEventListener("DOMContentLoaded", async function () {
document
.getElementById("fetchBtn")
.addEventListener("click", async function () {
browser.tabs.query({ currentWindow: true, active: true }).then((tabs) => {
browser.runtime
.sendMessage({ url: tabs[0].url })
.then(function () {
let done = document.createElement("p");
done.textContent = "Data sent to Collector with success";
done.setAttribute("class", "alert alert-success mt-3 mb-2");
document.body.appendChild(done);
})
.catch(function (e) {
let failed = document.createElement("p");
failed.textContent = "An error has occured, " + e.message;
failed.setAttribute("class", "alert alert-danger mt-3 mb-2");
document.body.appendChild(failed);
});
});
});
});
popup.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Crawl URL</title>
<script src="./logic.js"></script>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/css/bootstrap.min.css"
integrity="sha384-B0vP5xmATw1+K9KRQjQERJvTumQW0nPEzvF6L/Z6nronJ3oUOFUFpCjEUQouq2+l"
crossorigin="anonymous"
/>
</head>
<body class="p-3">
<button id="fetchBtn" class="btn btn-secondary">Fetch</button>
</body>
</html>
manifest.json
{
"manifest_version": 2,
"name": "TODO",
"version": "1.0",
"browser_specific_settings": {
"gecko": {
"id": "TODO",
"strict_min_version": "93.0"
}
},
"description": "TODO",
"icons": {
"128": "icons/logoAmpoule_142.png"
},
"browser_action": {
"default_popup": "popup.html",
"browser_style": true
},
"permissions": ["tabs", "webNavigation", "storage"],
"background": {
"scripts": ["background.js"]
}
}
Conclusion
It is a POC to validate the infinite power of obsidian !
I am more than happy in regards of what is possible with this editor, we can create and adapt a bunch of flows together to extend it we our custom needs.