rework markdown processor

This commit is contained in:
tristan 2024-01-02 08:50:33 +00:00
parent 9421a0a910
commit d04a6cc679
4 changed files with 105 additions and 145 deletions

View file

@ -19,7 +19,6 @@ You can process your site with the power of nixpkgs - eg you could process image
## How ## How
Read `flake.nix` for basic usage. Read `flake.nix` for basic usage.
The default package will serve the site locally (using caddy) The default package will serve the site locally (using caddy)
```sh ```sh
@ -40,11 +39,11 @@ nix run .#watch
## features ## features
- [X] file paths - file paths
- [X] global styles - global styles
- [X] custom components with styles - custom components with styles
- [X] component extend / inheritance - component extend / inheritance
- [ ] templating - templating
### markdown ### markdown
@ -52,16 +51,20 @@ nix run .#watch
- [X] headers - [X] headers
- [X] paragraphs - [X] paragraphs
- [ ] tables - [ ] tables
- [X] lists - [X] unordered lists
- [ ] embedded lists - [ ] ordered lists
- [ ] nested lists
- [X] checkboxes - [X] checkboxes
- [X] [links](https://www.markdownguide.org/basic-syntax/#links)
- [ ] images - [ ] images
- [ ] links - [ ] block quotes
- [X] `codeblocks` - [X] `codeblocks are pretty neat`
- [X] ~superscript~ - [X] ~sub~script
- [X] ^subscript^ - [X] ^super^script
- [X] ==highlight== - [X] ==mark== my words
- [X] *italics* - [X] *italics* _emphasis_ inside*of*the words_with_underscores
- [X] **bold** - [X] **bold** __strong__ even**in**words but__dont__break
- [X] ~~strikethrough~~ - [X] ***bold italics!***
- [X] ~~strikethrough everything~~

View file

@ -54,10 +54,10 @@
]; ];
watch = let w = import ./testing/watch.nix pkgs; watch = let w = import ./testing/watch.nix pkgs;
in w "nix run .#test" "fx"; in w "nix run .#test --show-trace" "fx";
dev = let w = import ./testing/watch.nix pkgs; dev = let w = import ./testing/watch.nix pkgs;
in w "nix run .#" "caddy"; in w "nix run .# --show-trace" "caddy";
}; };
}; };
} }

View file

@ -1,17 +1,5 @@
let let
elems = import ./elems.nix; elems = import ./elems.nix;
html = import ./html.nix;
H = n:
let
v = if n < 1 then
builtins.trace "attempted to make heading size ${toString n} (min is 1)"
1
else if n > 6 then
builtins.trace "attempted to make heading size ${toString n} (max is 6)"
6
else
n;
in html.tag "h${toString v}" { };
in rec { in rec {
readMd = md: readMd = md:
if builtins.isPath md then if builtins.isPath md then
@ -19,9 +7,7 @@ in rec {
else else
processMd md; processMd md;
processMd = md: processMd = processStr;
(map (c: if builtins.isString c then mdBlock c else "")
(builtins.split "\n\n" md));
recReadMd = root: recReadMd = root:
assert builtins.isPath root; assert builtins.isPath root;
@ -46,25 +32,6 @@ in rec {
mdToPage = md: mdToPage = md:
elems.Doc { } [ [ (elems.title { } "markdown file") ] (readMd md) ]; elems.Doc { } [ [ (elems.title { } "markdown file") ] (readMd md) ];
mdBlock = block:
(let
h = heading block;
c = code block;
ul = list block;
in (if h.matched then
h.block
else if c.matched then
c.block
else if ul.matched then
ul.block
else
elems.p (processStr block)));
heading = block:
matchThen "(#+) (.*)" block (m:
let l = builtins.stringLength (builtins.elemAt m 0);
in H l (builtins.elemAt m 1));
code = block: code = block:
matchThen "(```)(.*)(```)" block (m: elems.code (builtins.elemAt m 1)); matchThen "(```)(.*)(```)" block (m: elems.code (builtins.elemAt m 1));
@ -95,16 +62,16 @@ in rec {
content content
]; ];
replace = matcher: apply: block: replace = regex: apply: block:
(let (let
m = builtins.match matcher block; m = builtins.match regex block;
before = let v = builtins.elemAt m 0; in if v == null then "" else v; before = let v = builtins.elemAt m 0; in if v == null then "" else v;
inner = builtins.elemAt m 1; after = toString( builtins.elemAt m (matchCount - 1));
after = builtins.elemAt m 2; matchCount = builtins.length m;
in if m == null then in if m == null then
block block
else else
(replace matcher apply before) + (apply inner) + after); (replace regex apply before) + (apply m) + after);
rule = matcher: apply: blocks: rule = matcher: apply: blocks:
map (b: if builtins.isString b then replace matcher apply b else b) blocks; map (b: if builtins.isString b then replace matcher apply b else b) blocks;
@ -118,19 +85,77 @@ in rec {
in assert i < len; in assert i < len;
if next < len then rule (applyRules next rules group) else rule group; if next < len then rule (applyRules next rules group) else rule group;
basicRule = matcher: elem: rule matcher (m: elem (builtins.elemAt m 1));
processStr = applyRules 0 [ processStr = applyRules 0 [
(rule (wrap "\\^") elems.sup) (basicRule (wrap "\\^") elems.sup)
(rule (wrap "~") elems.sub) (basicRule (wrap "~") elems.sub)
(rule (wrap "\\*") elems.em) (basicRule (wrap "\\*") elems.em)
(rule (wrap "`") elems.code) (basicRule (wrapBreak "_") elems.em)
(rule (wrap "==") elems.mark) (basicRule (wrap "`") elems.code)
(rule (wrap "~~") elems.del) (basicRule (wrap "==") elems.mark)
(rule (wrap "\\*\\*") elems.strong) (basicRule (wrap "~~") elems.del)
(rule (contains "<([-[:alnum:].%?&#=:/]+)>") (m: elems.a m m)) (basicRule (wrap "\\*\\*") elems.strong)
(basicRule (wrapBreak "__") elems.strong)
(rule (contains "\\[(.*)]\\((.*)\\)") (
m:
let
href = builtins.elemAt m 2;
text = builtins.elemAt m 1;
in
(elems.a href text)
))
(rule (''
(.*
)([^-][^
]+
)((- [^
]+
)+)(.*)'') (
l:
(elems.ul (basicRule (''
(.*
)?- ([^
]+)
(.*)'') (m:
elems.li (basicRule ("()\\[(.)](.*)") (check:
elems.input {
type = "checkbox";
checked = check != " ";
disabled = true;
}) [ m ])) [ (builtins.elemAt l 2) ]))
))
(basicRule ( "(.*\n\n)?(.+)\n(.*)?" ) elems.p)
(basicRule ("(.*\n\n)?```(.*)```(.*)?") (elems.textarea { readonly = true; }))
(basicRule (containsBreak ''
###### ([^
]+)'') (elems.h6))
(basicRule (containsBreak ''
##### ([^
]+)'') (elems.h5))
(basicRule (containsBreak ''
#### ([^
]+)'') (elems.h4))
(basicRule (containsBreak ''
### ([^
]+)'') (elems.h3))
(basicRule (containsBreak ''
## ([^
]+)'') (elems.h2))
(basicRule (containsBreak ''
# ([^
]+)'') (elems.h1))
(basicRule (containsBreak "<(${linkmatcher})>") (m: elems.a m m))
]; ];
contains = matcher: "(.* )?${matcher}(.*)"; linkmatcher = "[-[:alnum:].%?&#=:/]+";
wrap = matcher: contains "${matcher}(.+)${matcher}"; contains = matcher: "(.*)?${matcher}(.*)";
wrap = matcher: contains "${matcher}([^${matcher}]+)${matcher}";
containsBreak = matcher: "(.*[[:space:]])?${matcher}(.*)";
wrapBreak = matcher: containsBreak "${matcher}([^${matcher}]+)${matcher}";
matchThen = matcher: block: func: matchThen = matcher: block: func:
let m = builtins.match matcher block; let m = builtins.match matcher block;

View file

@ -4,30 +4,6 @@ let
it = import ./it.nix; it = import ./it.nix;
in with md; [ in with md; [
(it "gets md heading" {
actual = mdBlock "# title of the page";
expected = elems.h1 "title of the page";
})
(it "gets md heading 2" {
actual = mdBlock "## a subheading";
expected = elems.h2 "a subheading";
})
(it "limits to 6 #" {
actual = mdBlock "######## super ultra tiny heading";
expected = elems.h6 "super ultra tiny heading";
})
(it "makes a code block" (let
code = ''
this is my code
'';
in {
actual = mdBlock "```${code}```";
expected = elems.code code;
}))
(it "matches a list of one element" ({ (it "matches a list of one element" ({
actual = list '' actual = list ''
- something - something
@ -100,31 +76,11 @@ in with md; [
}; };
})) }))
(it "finds surrounded parts" ({ (it "processes whole string with all rules" ({
actual = replace (wrap "\\*\\*") elems.strong '' actual = processStr ''
this text **may** contain **bold** words inside it. this text **may** *or may not* contain **bold** words *inside* it.
''; '';
expected = [ expected = (elems.p [
"this text"
(elems.strong "may")
"contain"
(elems.strong "bold")
''
words inside it.
''
];
asString = true;
}))
(it "surrounds in list of elems" ({
actual = rule (wrap "\\*") elems.em [
"this text"
(elems.strong "may")
"*or may not* contain"
(elems.strong "bold")
"words *inside* it."
];
expected = [
"this text" "this text"
(elems.strong "may") (elems.strong "may")
(elems.em "or may not") (elems.em "or may not")
@ -133,42 +89,18 @@ in with md; [
"words" "words"
(elems.em "inside") (elems.em "inside")
"it." "it."
]; ]);
asString = true; asString = true;
})) }))
(it "processes whole string with all rules" ({ (it "makes paragraphs" {
actual = processStr ''
this text **may** *or may not* contain **bold** words *inside* it.
'';
expected = [
"this text"
(elems.strong "may")
(elems.em "or may not")
"contain"
(elems.strong "bold")
"words"
(elems.em "inside")
''
it.
''
];
asString = true;
}))
(it "processes md block" {
actual = readMd '' actual = readMd ''
# foo bar
lorem ipsum lorem ipsum
dolor sit
foo bar
''; '';
expected = [ expected = "<p >lorem ipsum\ndolor sit\n</p><p >foo bar</p>";
(elems.h1 { } "foo bar")
""
(elems.p { } ''
lorem ipsum
'')
];
asString = true; asString = true;
}) })