Webux Lab

By Studio Webux

Obsidian Local API + Firefox

TG
Tommy Gingras Studio Webux 2023-03-15

Obsidian with Local API and Firefox Extension

I will split this post in three sections

  1. Obsidian Plugin (Covered in another article: https://webuxlab.com/en/projects/obsidian-plugin)
  2. Small Local API in NodeJS
  3. Firefox Extension

First Section - Obsidian Plugin

The goal of this plugin is to provide an endpoint that will receive data using JSON (encoded in base64) and save it in a page.

Please see : https://webuxlab.com/en/projects/obsidian-plugin

Second Section - The local API

I am familiar with puppeteer, so this is what I use all the time when it is time to do some crawling.

index.js

const express = require("express");
const app = express();
const processing = require("./processing");
const cors = require("cors");
const { open } = require("lmdb");
const Queue = require("better-queue");
const openBrowser = require("open");

const q = new Queue(doStuff, {
  concurrent: 2,
  maxRetries: 3,
  retryDelay: 5000,
  maxTimeout: 60000,
});

const hostname = "127.0.0.1";
const port = 3000;

let myDB = open({
  path: "entries",
  // any options go here, we can turn on compression like this:
  compression: true,
});

app.use(cors());
app.use(express.json());

async function doStuff(input, cb) {
  const data = await processing(input);

  await myDB.put(data.centrisNo || new Date().toISOString(), { ...data });
  return cb(null, { input, data });
}

app.get("/status", async (req, res) => {
  res.send({
    queueInfo: q.getStats(),
    info: null,
  });
});

app.post("/", async (req, res) => {
  try {
    res.setHeader("Access-Control-Allow-Origin", "*");
    res.setHeader("Access-Control-Request-Method", "*");
    res.setHeader("Access-Control-Allow-Methods", "OPTIONS, GET");
    res.setHeader("Access-Control-Allow-Headers", "*");
    if (req.method === "OPTIONS") {
      res.writeHead(200);
      res.end();
      return;
    }

    const urls = req.body.datas;
    if (urls && urls.length === 0) throw new Error("No Url provided.");

    urls.map((url) =>
      q
        .push(url)
        .on("finish", async function ({ input, data }) {
          console.log("Finished", input);

          delete data.errors;
          await openBrowser(
            `obsidian://endpoint-local/?data=${Buffer.from(
              JSON.stringify([data])
            ).toString("base64")}`,
            { background: true }
          );
        })
        .on("failed", function (err) {
          console.error("Error:", err);
        })
    );

    res.send("Thank You!");
  } catch (e) {
    console.error(e.message);
    console.error(e.stack);
    res.send("Oops");
  }
});

app.listen(port, () => {
  console.log(`Server running at http://${hostname}:${port}/`);
});

processing.js:

const puppeteer = require("puppeteer");

module.exports = async (url) => {
  const browser = await puppeteer.launch({
    headless: true,
    timeout: 60000,
  });
  const page = await browser.newPage();

  console.log(`Working on it... (${url})`);
  const data = { errors: [], datetime: new Date() };
  try {
    page.setDefaultTimeout(2000);
    page.setUserAgent("SET_YOUR_USER_AGENT");

    await page.goto(url, { timeout: 20000 });

    // Set screen size
    await page.setViewport({ width: 1512, height: 2050 });

    // TODO: Add your puppeteer commands and set the result in the data variable

    data.url = url;
    data.foo = "bar";
    //...
  } catch (e) {
    console.log(e.stack);
    data.errors.push(
      `${url} - Element might not be found for - '${e.message}'`
    );
  } finally {
    await browser.close();
    return data;
  }
};

package.json

{
  "name": "api",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "start": "node index.js"
  },
  "author": "Studio Webux",
  "license": "MIT",
  "dependencies": {
    "better-queue": "^3.8.12",
    "cors": "^2.8.5",
    "express": "^4.18.2",
    "lmdb": "^2.7.9",
    "puppeteer": "^19.6.3"
  }
}

Third Section - Firefox extension

I’m far from an expert, I did two of them so far.

I won’t cover all details, I do recommend that you read the official documentation. It is quite easy to understand.

background.js

function postData(url = "") {
  return browser.storage.sync.get("endpoint").then(
    async function (data) {
      console.log(data);
      const response = await fetch(data.endpoint, {
        method: "POST",
        mode: "cors",
        cache: "no-cache",
        credentials: "omit",
        headers: { "Content-Type": "application/json" },
        redirect: "follow",
        referrerPolicy: "no-referrer",
        body: JSON.stringify({
          datas: [url],
        }),
      });
      return response.text();
    },
    function (error) {
      console.error(error);
      throw new Error(error);
    }
  );
}

function handleMessage(message) {
  if (message.url) {
    return postData(message.url).then(function () {
      return true;
    });
  } else {
    throw new Error("Missing an URL");
  }
}

browser.runtime.onMessage.addListener(handleMessage);

browser.webNavigation.onHistoryStateUpdated.addListener(
  function () {
    browser.tabs.executeScript(null, { file: "logic.js" });
  },
  {
    url: [{ originAndPathMatches: "^.+://.*/.+/.+$" }],
  }
);

logic.js

browser.storage.sync.set({
  endpoint: "http://localhost:3000",
});

document.addEventListener("DOMContentLoaded", async function () {
  document
    .getElementById("fetchBtn")
    .addEventListener("click", async function () {
      browser.tabs.query({ currentWindow: true, active: true }).then((tabs) => {
        browser.runtime
          .sendMessage({ url: tabs[0].url })
          .then(function () {
            let done = document.createElement("p");
            done.textContent = "Data sent to Collector with success";
            done.setAttribute("class", "alert alert-success mt-3 mb-2");
            document.body.appendChild(done);
          })
          .catch(function (e) {
            let failed = document.createElement("p");
            failed.textContent = "An error has occured, " + e.message;
            failed.setAttribute("class", "alert alert-danger mt-3 mb-2");
            document.body.appendChild(failed);
          });
      });
    });
});

popup.html

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Crawl URL</title>
    <script src="./logic.js"></script>
    <link
      rel="stylesheet"
      href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/css/bootstrap.min.css"
      integrity="sha384-B0vP5xmATw1+K9KRQjQERJvTumQW0nPEzvF6L/Z6nronJ3oUOFUFpCjEUQouq2+l"
      crossorigin="anonymous"
    />
  </head>
  <body class="p-3">
    <button id="fetchBtn" class="btn btn-secondary">Fetch</button>
  </body>
</html>

manifest.json

{
  "manifest_version": 2,
  "name": "TODO",
  "version": "1.0",
  "browser_specific_settings": {
    "gecko": {
      "id": "TODO",
      "strict_min_version": "93.0"
    }
  },
  "description": "TODO",
  "icons": {
    "128": "icons/logoAmpoule_142.png"
  },
  "browser_action": {
    "default_popup": "popup.html",
    "browser_style": true
  },
  "permissions": ["tabs", "webNavigation", "storage"],
  "background": {
    "scripts": ["background.js"]
  }
}

Conclusion

It is a POC to validate the infinite power of obsidian !

I am more than happy in regards of what is possible with this editor, we can create and adapt a bunch of flows together to extend it we our custom needs.


Search