From d04a6cc679db8862b58f6db0822b4ffeb2377483 Mon Sep 17 00:00:00 2001 From: tristan Date: Tue, 2 Jan 2024 08:50:33 +0000 Subject: [PATCH] rework markdown processor --- README.md | 35 +++++++------ flake.nix | 4 +- nixite/md.nix | 123 ++++++++++++++++++++++++++------------------ testing/md.test.nix | 88 ++++--------------------------- 4 files changed, 105 insertions(+), 145 deletions(-) diff --git a/README.md b/README.md index 282dc97..dfa5899 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ You can process your site with the power of nixpkgs - eg you could process image ## How Read `flake.nix` for basic usage. - The default package will serve the site locally (using caddy) ```sh @@ -40,11 +39,11 @@ nix run .#watch ## features -- [X] file paths -- [X] global styles -- [X] custom components with styles -- [X] component extend / inheritance -- [ ] templating +- file paths +- global styles +- custom components with styles +- component extend / inheritance +- templating ### markdown @@ -52,16 +51,20 @@ nix run .#watch - [X] headers - [X] paragraphs - [ ] tables -- [X] lists -- [ ] embedded lists +- [X] unordered lists +- [ ] ordered lists +- [ ] nested lists - [X] checkboxes +- [X] [links](https://www.markdownguide.org/basic-syntax/#links) - [ ] images -- [ ] links -- [X] `codeblocks` -- [X] ~superscript~ -- [X] ^subscript^ -- [X] ==highlight== -- [X] *italics* -- [X] **bold** -- [X] ~~strikethrough~~ +- [ ] block quotes +- [X] `codeblocks are pretty neat` +- [X] ~sub~script +- [X] ^super^script +- [X] ==mark== my words +- [X] *italics* _emphasis_ inside*of*the words_with_underscores +- [X] **bold** __strong__ even**in**words but__dont__break +- [X] ***bold italics!*** +- [X] ~~strikethrough everything~~ + diff --git a/flake.nix b/flake.nix index deeeb97..05996ca 100644 --- a/flake.nix +++ b/flake.nix @@ -54,10 +54,10 @@ ]; watch = let w = import ./testing/watch.nix pkgs; - in w "nix run .#test" "fx"; + in w "nix run .#test --show-trace" "fx"; dev = let w = import ./testing/watch.nix pkgs; - in w "nix run .#" "caddy"; + in w "nix run .# --show-trace" "caddy"; }; }; } diff --git a/nixite/md.nix b/nixite/md.nix index b36be1c..7aa0e36 100644 --- a/nixite/md.nix +++ b/nixite/md.nix @@ -1,17 +1,5 @@ let elems = import ./elems.nix; - html = import ./html.nix; - H = n: - let - v = if n < 1 then - builtins.trace "attempted to make heading size ${toString n} (min is 1)" - 1 - else if n > 6 then - builtins.trace "attempted to make heading size ${toString n} (max is 6)" - 6 - else - n; - in html.tag "h${toString v}" { }; in rec { readMd = md: if builtins.isPath md then @@ -19,9 +7,7 @@ in rec { else processMd md; - processMd = md: - (map (c: if builtins.isString c then mdBlock c else "") - (builtins.split "\n\n" md)); + processMd = processStr; recReadMd = root: assert builtins.isPath root; @@ -46,25 +32,6 @@ in rec { mdToPage = md: elems.Doc { } [ [ (elems.title { } "markdown file") ] (readMd md) ]; - mdBlock = block: - (let - h = heading block; - c = code block; - ul = list block; - in (if h.matched then - h.block - else if c.matched then - c.block - else if ul.matched then - ul.block - else - elems.p (processStr block))); - - heading = block: - matchThen "(#+) (.*)" block (m: - let l = builtins.stringLength (builtins.elemAt m 0); - in H l (builtins.elemAt m 1)); - code = block: matchThen "(```)(.*)(```)" block (m: elems.code (builtins.elemAt m 1)); @@ -95,16 +62,16 @@ in rec { content ]; - replace = matcher: apply: block: + replace = regex: apply: block: (let - m = builtins.match matcher block; + m = builtins.match regex block; before = let v = builtins.elemAt m 0; in if v == null then "" else v; - inner = builtins.elemAt m 1; - after = builtins.elemAt m 2; + after = toString( builtins.elemAt m (matchCount - 1)); + matchCount = builtins.length m; in if m == null then block else - (replace matcher apply before) + (apply inner) + after); + (replace regex apply before) + (apply m) + after); rule = matcher: apply: blocks: map (b: if builtins.isString b then replace matcher apply b else b) blocks; @@ -118,19 +85,77 @@ in rec { in assert i < len; if next < len then rule (applyRules next rules group) else rule group; + basicRule = matcher: elem: rule matcher (m: elem (builtins.elemAt m 1)); + processStr = applyRules 0 [ - (rule (wrap "\\^") elems.sup) - (rule (wrap "~") elems.sub) - (rule (wrap "\\*") elems.em) - (rule (wrap "`") elems.code) - (rule (wrap "==") elems.mark) - (rule (wrap "~~") elems.del) - (rule (wrap "\\*\\*") elems.strong) - (rule (contains "<([-[:alnum:].%?&#=:/]+)>") (m: elems.a m m)) + (basicRule (wrap "\\^") elems.sup) + (basicRule (wrap "~") elems.sub) + (basicRule (wrap "\\*") elems.em) + (basicRule (wrapBreak "_") elems.em) + (basicRule (wrap "`") elems.code) + (basicRule (wrap "==") elems.mark) + (basicRule (wrap "~~") elems.del) + (basicRule (wrap "\\*\\*") elems.strong) + (basicRule (wrapBreak "__") elems.strong) + (rule (contains "\\[(.*)]\\((.*)\\)") ( + m: + let + href = builtins.elemAt m 2; + text = builtins.elemAt m 1; + in + (elems.a href text) + )) + (rule ('' + (.* + )([^-][^ + ]+ + )((- [^ + ]+ + )+)(.*)'') ( + + l: + (elems.ul (basicRule ('' + (.* + )?- ([^ + ]+) + (.*)'') (m: + elems.li (basicRule ("()\\[(.)](.*)") (check: + elems.input { + type = "checkbox"; + checked = check != " "; + disabled = true; + }) [ m ])) [ (builtins.elemAt l 2) ])) + + )) + (basicRule ( "(.*\n\n)?(.+)\n(.*)?" ) elems.p) + (basicRule ("(.*\n\n)?```(.*)```(.*)?") (elems.textarea { readonly = true; })) + (basicRule (containsBreak '' + ###### ([^ + ]+)'') (elems.h6)) + (basicRule (containsBreak '' + ##### ([^ + ]+)'') (elems.h5)) + (basicRule (containsBreak '' + #### ([^ + ]+)'') (elems.h4)) + (basicRule (containsBreak '' + ### ([^ + ]+)'') (elems.h3)) + (basicRule (containsBreak '' + ## ([^ + ]+)'') (elems.h2)) + (basicRule (containsBreak '' + # ([^ + ]+)'') (elems.h1)) + (basicRule (containsBreak "<(${linkmatcher})>") (m: elems.a m m)) ]; - contains = matcher: "(.* )?${matcher}(.*)"; - wrap = matcher: contains "${matcher}(.+)${matcher}"; + linkmatcher = "[-[:alnum:].%?&#=:/]+"; + contains = matcher: "(.*)?${matcher}(.*)"; + wrap = matcher: contains "${matcher}([^${matcher}]+)${matcher}"; + + containsBreak = matcher: "(.*[[:space:]])?${matcher}(.*)"; + wrapBreak = matcher: containsBreak "${matcher}([^${matcher}]+)${matcher}"; matchThen = matcher: block: func: let m = builtins.match matcher block; diff --git a/testing/md.test.nix b/testing/md.test.nix index c2b58e7..a358cf9 100644 --- a/testing/md.test.nix +++ b/testing/md.test.nix @@ -4,30 +4,6 @@ let it = import ./it.nix; in with md; [ - (it "gets md heading" { - actual = mdBlock "# title of the page"; - expected = elems.h1 "title of the page"; - }) - - (it "gets md heading 2" { - actual = mdBlock "## a subheading"; - expected = elems.h2 "a subheading"; - }) - - (it "limits to 6 #" { - actual = mdBlock "######## super ultra tiny heading"; - expected = elems.h6 "super ultra tiny heading"; - }) - - (it "makes a code block" (let - code = '' - this is my code - ''; - in { - actual = mdBlock "```${code}```"; - expected = elems.code code; - })) - (it "matches a list of one element" ({ actual = list '' - something @@ -100,31 +76,11 @@ in with md; [ }; })) - (it "finds surrounded parts" ({ - actual = replace (wrap "\\*\\*") elems.strong '' - this text **may** contain **bold** words inside it. + (it "processes whole string with all rules" ({ + actual = processStr '' + this text **may** *or may not* contain **bold** words *inside* it. ''; - expected = [ - "this text" - (elems.strong "may") - "contain" - (elems.strong "bold") - '' - words inside it. - '' - ]; - asString = true; - })) - - (it "surrounds in list of elems" ({ - actual = rule (wrap "\\*") elems.em [ - "this text" - (elems.strong "may") - "*or may not* contain" - (elems.strong "bold") - "words *inside* it." - ]; - expected = [ + expected = (elems.p [ "this text" (elems.strong "may") (elems.em "or may not") @@ -133,42 +89,18 @@ in with md; [ "words" (elems.em "inside") "it." - ]; + ]); asString = true; })) - (it "processes whole string with all rules" ({ - actual = processStr '' - this text **may** *or may not* contain **bold** words *inside* it. - ''; - expected = [ - "this text" - (elems.strong "may") - (elems.em "or may not") - "contain" - (elems.strong "bold") - "words" - (elems.em "inside") - '' - it. - '' - ]; - asString = true; - })) - - (it "processes md block" { + (it "makes paragraphs" { actual = readMd '' - # foo bar - lorem ipsum + dolor sit + + foo bar ''; - expected = [ - (elems.h1 { } "foo bar") - "" - (elems.p { } '' - lorem ipsum - '') - ]; + expected = "

lorem ipsum\ndolor sit\n

foo bar

"; asString = true; })