$ set swdiary-201{8,9}.org; (sed 4q $1; grep '^[#*$][ +]' $1) | sed 's/2019/2019/' | tee $2
It's long overdue here. After some experiments with markdown:
it's an editing metaphor too far. Markdown is now for the purely literary uses. And there's still TiddlyWiki here.
So, the recovery process begins by capturing the dynalist bookmarks. I'm storing them locally here. The experiment was a success! My shorthand for saying "we won't try that again". I realized my trepidation about using Firefox bookmarks is I still don't understand what "sync" does. It's not exactly like Dropbox, so, … how about if I export my bookmarks here.
webmarks ()
{
: Web Bookmarks;
${*:-echo} $(home)/lib/bookmarks.html
}
iconless ()
{
: strip the ICON tag from BOOKMARK URLs;
sed 's/ICON="data:image.*"//' $*
}
Here's a little tour thru date formats and handling with the translit program tr. This is done to open Garrison Keillor's Writers Almanac for the current date. The function as written reports today's web URL to the caller.
writers_almanac ()
{
: GKs daily Writers Almanac;
: date: 2019-04-04;
: date: 2019-04-09 handle single-character day number;
local mon_d_year=$(date "+%B %e %Y" | tr -s 'A-Z ' a-z- );
: app_trace mon_d_year $mon_d_year;
${*:-echo} http://www.garrisonkeillor.com/radio/twa-the-writers-almanac-for-$mon_d_year/
}
$ writers_almanac
http://www.garrisonkeillor.com/radio/twa-the-writers-almanac-for-april-9-2019/
$ ... OR
$ writers_almanac open # opens the URL in your default browser
Today's added feature, to /handle single-character day number/s, needed experiments with the date and tr commands. The mistake I was making was there is no argument to the date +%X
idiom which produces a single letter. The `e` argument produces a two-character result. A single digit is preceded by a space. It's necessary to fold the month into lower case and collapse extra blanks to spaces. The tr command does this in one easy use: transliterate upper case to lower and "squeeze" each sequence of spaces into a single dash.
The function also exhibits what I'm calling a "smart" function. Smart functions usual default operation returns the object of interest. Often another command or function may perform a useful operation on the name.
How to collect functions, or any set of names. Using smart functions, in this case the default action displays the names. The function collect_namesEG
demonstrates the routine usage. Once the function is created and stored in an appropriate library, it needn't be updated unless the generating function is changed. There's an idea there, isn't there?
collect_names ()
{
: MFG a SMART _COLLECTION;
: rather than assume they are FUNCTIONS, just return NAMES;
: date: 2019-04-12;
local mfg=" : mfg: $(myname)";
eval "${1}_collection () { $mfg; \${*:-echo} ${*:2} \$(myname); }"
}
collect_namesEG ()
{
: demonstrate collect_functions;
: date: 2019-04-12;
collect_names graf "fun_{words,callgraf,alltype} app_fun{uses,call}"
}
collections ()
{
: just the root names of the collections;
sfg _collection | sed 's/_.*//'
}
function_collection ()
{
: mfg: collect_names;
${*:-echo} collect_names collect_namesEG graf_names $(myname)
}
graf_collection ()
{
: mfg: collect_names;
${*:-echo} fun_{words,callgraf,alltype} app_fun{uses,call} $(myname)
}
$ collect_names function collect_names{,EG} graf_names
Notice that function_collection
was also manufactured with the example in the last line. Use of graf
was the only working example at the time. The collections
function list the collections.
Remember, with smart functions, the default is over-ridden with an argument. To display the graf
functions, simply:
Some functions to write iso dates, equipped with a test case:
iso_YEARmoDY ()
{
: ISO Date -- [[[ YEAR ] MO] DA], default TODAY
: to serve the question: what functions were updated on what date;
case $# in
0)
date +%Y-%m-%d;;
1)
date +%Y-%m-$(_two_digit $1);;
2)
date +%Y-$(_two_digit $1)-$(_two_digit $2);;
3)
echo $(_four_digit $1)-$(_two_digit $2)-$(_two_digit $3);;
*)
$(myname) $1 $2 $3;;
esac
}
_century () {
: default years 1941 ... 2040
[[ $1 -gt 1582 ]] && { echo $1; return; }
[[ $1 -lt 100 ]] && {
[[ $1 -gt 40 ]] && { echo 19${1}; return; }
echo 20${1}; return
}
$(myname) $(_two_digit $1)
}
_two_digit () { expr "00$1" : '.*\(..\)'; }
_four_digit () { expr "$(_century $1)" : '.*\(....\)'; }
test_iso_YEARmoDY ()
{
local fun=$(expr "$(myname)" : 'test_\(.*\)');
echo $(myname) $fun
$fun
$fun 5
$fun 25
$fun 2 35
$fun 10 3
$fun 17 1 1
$fun 1400 1 1
$fun 1600 1 1
_century 22
_century 74
_century 1812
_century 325
_century 1444
}
The myname
function is the recursion operator.
Yesterday's functions were everyword
and everyword_write
:
everyword ()
{
: my writing tablet. save every bit of typed text, non-{diary,wiki};
: date: 2019-04-26;
${*:-echo} $(home)/lib/$(myname).md
}
everyword_write ()
{
: prepend a Markdown 3rd level DATESTAMP to the file;
: EDIT and BACKUP the file;
: date: 2019-04-26;
printf "\n### %s\n\n" $(mydate) >> $(everyword);
everyword emacs;
everyword backup
}
$ everyword more # page thru the file
$ everyword_write # adds a new entry
It seems appropriate to write an emacs command to add the date_stamp.
Which happened, and the need for everyword_write
has disappeared.
Today, I installed cscope, thinking it will be useful to build tags tables for my OrgMode files.
Over the past few days, I've focused on my callgraph. The object has been to replace the callgraph, with the comment-free function code. As concisely as possible, here is the callgraph of the top function graf_functions
and it's code replacement tree.
* graf_functions
* fun_callgraf
* app_funuses
* fun_words
* wpl
* report_notpipe
* report_usage
* myname
* report_notargcount
* report_usage
* report_notargcount
* shd_trim
* myname
* report_notfunction
* report_usage
* spacepad4
* myname
* report_notargcount
and here's the result from running graf_functions
on itself
graf_functions ()
{
report_notargcount 1 $# function ... && return 1;
local file=./$1.sh;
fun_callgraf $* | tee ./$1.org | awk '
BEGIN { fmt = "shd_trim %s | spacepad4 %d\n" }
NF == 2 { printf fmt, $2, index($0,"*")-2; }
' > $file;
echo $file
}
| fun_callgraf ()
| {
| report_notargcount 1 $# && return 1;
| for fun in $*;
| do
| app_funuses $fun | awk "
|
| \$1 ~ /^$fun\$/ { next };
| { print \"$fun\", \$1 }
| ";
| done | callgraph
| }
| | app_funuses ()
| | {
| | set -- $1 $(fun_words $*);
| | for fun in ${*:2};
| | do
| | echo $(type -t $fun) $fun;
| | done | awk "
| |
| | \$1 !~ /^function$/ || \$2 ~ /^_/ { next; }
| |
| | \$2 !~ /^$1$/ { print \$2 }
| |
| | "
| | }
| | | fun_words ()
| | | {
| | | function _fun_active ()
| | | {
| | | awk '!( $1 ~ /^:;*$/) && !( $1 ~/^\${\*:-.*}/ )' $*
| | | };
| | | declare -f $* | _fun_active | wpl | sort -u
| | | }
| | | | wpl ()
| | | | {
| | | | report_notpipe && return 1;
| | | | tr -cs 'A-Za-z0-9_' '\n'
| | | | }
| | | | | report_notpipe ()
| | | | | {
| | | | | [[ -p /dev/stdin ]] && return 1;
| | | | | report_usage is NOT reading a pipe
| | | | | }
| | | | | | report_usage ()
| | | | | | {
| | | | | | echo USAGE $(myname 3): $* 1>&2
| | | | | | }
| | | | | | | myname ()
| | | | | | | {
| | | | | | | echo ${FUNCNAME[${1:-1}]}
| | | | | | | }
| | report_notargcount ()
| | {
| | [[ $2 -ge $1 ]] && return 1;
| | report_usage need at least $1 arg/s: ${*:3}
| | }
| | | report_usage ()
| | | {
| | | echo USAGE $(myname 3): $* 1>&2
| | | }
| report_notargcount ()
| {
| [[ $2 -ge $1 ]] && return 1;
| report_usage need at least $1 arg/s: ${*:3}
| }
| shd_trim ()
| {
| set ${*:-$(myname)};
| report_notfunction $1 && return 1;
| declare -f $* | awk ' $1 !~ /^:[;]*$/'
| }
| | myname ()
| | {
| | echo ${FUNCNAME[${1:-1}]}
| | }
| | report_notfunction ()
| | {
| | declare -f $1 > /dev/null && return 1;
| | report_usage $1 is NOT a function
| | }
| | | report_usage ()
| | | {
| | | echo USAGE $(myname 3): $* 1>&2
| | | }
| spacepad4 ()
| {
| report_notargcount 1 $# $(myname) N, multiple of 4 && return 1;
| local string="| ";
| case $1 in
| 0 | -*)
| cat -
| ;;
| *)
| (( ntab = $1 - 4 ));
| sed "s/^/$string/" | $(myname) $ntab
| ;;
| esac
| }
| | myname ()
| | {
| | echo ${FUNCNAME[${1:-1}]}
| | }
| | report_notargcount ()
| | {
| | [[ $2 -ge $1 ]] && return 1;
| | report_usage need at least $1 arg/s: ${*:3}
| | }
An obvious fix to the latter tree would be to replace a repeated function by it's name offset to point out it's repeated code.
The tree above is smart enough to avoid descending down previously visited functions.
The heavy lifting here is done by python program: callgraph
which takes caller-called pairs, and constructs the hierarchical graph. Recursion is handled the same as any subsequent call. Once a function has had it's sub-structure outlined, it's not descended again.
Time to get serious about my tag work, first the OrgMode tags.
orgmode ()
{
: date: 2019-05-08;
${*:-echo} $(lib)/orgmode.txt
}
org_tags ()
{
: date: 2019-05-08;
grep '^\*.*:.*:$' ${*:-$(< $(orgmode))} 2> .grep.err | sed '
s/:[^:]*/ /
s/ .* //
s/:/ /g
' | onebyN 2> .one.err > $(lib)/$(myname).txt;
comment $(wc -l $(lib)/$(myname).txt)
}
untagged_files ()
{
: doc: regenerate the list of ORG files;
: find and save the tagged files;
: save the list of untagged ORG files,;
: leave output file name on stdout;
: date: 2019-05-08;
function _untagged ()
{
quietly suf_files org $HOME | usefulfiles | htcl | sort > $(orgmode);
: -- writes the tags file;
org_tags;
tagged_files | comm -13 - <(orgmode cat);
unset $(myname)
};
file=$(lib)/$(myname).txt;
: app_trace file $file, $*;
_untagged > $file;
${*:-echo} $file
}
tagged_files ()
{
: date: 2019-05-08;
field 1 < $(lib)/org_tags.txt | sort -u
}
orgtag_collection ()
{
: date: 2019-05-15;
${*:-echo} org{mode,_{tag,tags}} {un,}tagged_files $(myname)
}
The top of the heap, other than orgtag_collection
, is untagged_files
.
It writes the orgmode
files, ultimately writing the _untagged
list. These are the candidates to work on, filling in the missing tags.
First, the pleasantries. Happy 49th Anniversary, Pat. God Bless these years we've shared, if not enjoyed.
And this is the 75th Anniversary of D-Day.
Today's challenge is to trim these functions from three to one. That's a question. Can it be done?
Since I've discovered the retiredlib
concept, pick the most appropriate name to be the survivor and retire the other two. Functions going to the retiredlib
, if ever used, are recorded in the retired log, and announced as obsolescent. Then, by the recorded context, their use may be upgraded to the surviving function.
Here they are in their current state. It certainly looks like they should be collapsed into one function.
fun_lastchange ()
{
: lists latest update to each function with date record stamp;
: date: 2018-12-27;
shd_getdate $* | sed 's/: date:* //; s/;.*//' | sort -rk2 | printfirst
}
shd_history ()
{
: list LAST_CHANGEDATE FUNCTION for each function in Environment;
: date: 2019-03-24;
set | shd_getdate | awk ' { sub(/;/, "", $4); printf "%s\t%s\n", $4, $1 }' | sort -r | awk '!p[$2]++ { print }'
}
shd_latest ()
{
: date: 2016-10-16;
: uses: shd_getdate awk;
: date: 2018-01-17;
: date: 2018-02-16;
shd_getdate $* | awk '
{ sub(/;/,"", $4); date[$1] = $4 };
END { for (d in date) print date[d], d; }
'
}
The outline of the solution suggests using the "Standard In or Args" concept, which itself seems burdened with a bit of redundancy.
stdinFileArgs ()
{
: date: 2018-08-15;
case $# in
0)
report_notpipe && return 1;
cat -
;;
*)
[[ -f $1 ]] && {
cat $*
} || echo $*
;;
esac
}
stdin_orargs ()
{
: treat STDIN as args, OR echo any args, default to CALLER;
: date: 2019-03-24;
[[ -p /dev/stdin ]] && cat || echo ${*:-$(myname 2)}
}
spaceit ()
{
: anagram of letters in set, pipe, cat;
: date 2016-10-27;
: date 2016-11-03;
: date 2016-11-12;
function ispipe ()
{
[[ -p /dev/stdin ]]
};
case $# in
0)
ispipe && cat || set
;;
*)
cat $*
;;
esac
}
stdin_collection ()
{
: mfg: collect_names;
: date: 2019-04-13;
${*:-echo} stdinFileArgs stdin_orargs spaceit $(myname)
}
Here, the _collection function has remembered the names of the functions with like behavior. In this case, suitable for re-engineering. (uggh!!) All this prompts these functions with a quick demo:
collection_files ()
{
: date: 2019-06-06;
collection_info | egrep '[./]'
}
collection_info ()
{
: date: 2019-06-06;
function _c_label ()
{
$1 | tpl | sed "s/^/$1 /"
};
foreach _c_label $(collections) | sort -u
}
$ collection_files
binlib_collection /Users/martymcgowan/Dropbox/bin/calledlib
binlib_collection /Users/martymcgowan/Dropbox/bin/cmdlib
binlib_collection /Users/martymcgowan/Dropbox/bin/proflib
binlib_collection /Users/martymcgowan/Dropbox/bin/programlib
binlib_collection /Users/martymcgowan/Dropbox/bin/publiclib
binlib_collection /Users/martymcgowan/Dropbox/bin/retiredlib
binlib_collection /Users/martymcgowan/Dropbox/bin/shelflib
binlib_collection /Users/martymcgowan/Dropbox/pubs/mcgit/bin/applib
binlib_collection /Users/martymcgowan/Dropbox/pubs/mcgit/bin/commonlib
binlib_collection /Users/martymcgowan/Dropbox/pubs/mcgit/bin/fapilib
binlib_collection /Users/martymcgowan/Dropbox/pubs/mcgit/bin/ftplib
binlib_collection /Users/martymcgowan/Dropbox/pubs/mcgit/bin/refreshlib
binlib_collection /Users/martymcgowan/Dropbox/rdb/bin/rdlib
tagjob_collection org_tags.txt
tagjob_collection tag_add.txt
tagjob_collection tag_group.txt
tagjob_collection tag_hunt.txt
tagjob_collection tag_names.txt
tagjob_collection tag_next.txt
tagjob_collection tag_tally.txt
$ ...
Today was moving day. I've collection functions from the init_libs
list into two libraries. Leaving the retiredlib
undisturbed, the other functions were collected in the functionlib
. Best summarized here:
$ diff .bak/.user_profile .bak/.bak
90c90
< ${*:-echo} {function,retired}lib
---
> ${*:-echo} {common,prof,shelf,public,app,program,cmd,rd,fapi,retired}lib
$ ...
Moreover, I've modified the f2file
function so it's now possible to record changes to individual functions. The .functionlib
directory in /Dropbox/bin/
is now emptied on each run. The current state of each function is recorded in a file named for the function. Since backup avoids backing up the unchanged files. only changes are recorded in the .bak/
tree. Occasional versioning records an orderly progression.
This little command shows the local backup history:
.functionlib.$ find .bak -type f | awk -F/ '{ print NF }' | sort -n | uniq -c
950 2
7 3
1 4
.functionlib.$
This shows 950 functions backed up. The 2, 3, 4 in the second column show the unique functions in the latest, prior, and in this case earliest backups. The number is one more than the number of backups since the awk field count includes the first .bak/ directory in the count.
A call from Bill Anderson the day before yesterday put me onto Apple's moving it's next release's default shell to zsh. Which should prompt a move to make most functions portable there.
But first, no rush to zsh.
f2file was important function in the upgrade to functionlib
.
f2file ()
{
: date: 2017-07-10;
: date: 2017-10-26;
report_notfile ${1:-MissingFirstArgument} && return 1;
set -- $1 .${2:-$(basename $1)} $(awk_file);
trace_call $*;
: gawk and awk differ in CLOSE built-in;
: gawk uses FCLOSE, awk CLOSE ?!;
if [ -d $2 ]; then
rm -f $2/*;
else
mkdir $2;
fi;
: read -p "f2file: $* ";
grep -v '_init 1>&2$' $1 | awk -v dir=$2 -f $3
}
The if - else - fi
block is the update. Previously it was a rm -fr
$2
to clean out then entire directory. Note the dot-named directory. The -fr
flags recursively clean out an entire tree. Which is where the backups are kept, and any versioning. The above code was plucked from the first backup.
Here's the backup tree from the first level down. The backup scheme makes a copy of each backed up file in the first level. The ones below represent the changed files.
.bak.$ find .bak -type f
.bak/f2file
.bak/cdx
.bak/usefulfiles
.bak/rd_init
.bak/backup_log
.bak/recorg_collection
.bak/app_funuses
.bak/graf_collection
.bak/backup_one
.bak/.bak/f2file
.bak/.bak/args_collect
.bak/.bak/rdb_join
.bak/app_update
.bak/lib_tosource
.bak/back_init
.bak/backup_here
.bak/back_up
.bak/qcmd
.bak/fun_words
.bak/rdb_init
.bak/collect_names
.bak/backup_varnames
.bak/args_collect
.bak/rdb_historydates
.bak/app_these
.bak/fun_callgraf
.bak/rdb_join
.bak.$
A thought on a zsh upgrade. It appears some of the syntactic goodies differ between bash and zsh, needing research at the moment. This suggests their explicit use is hidden in a function which returns the shell-appropriate syntax. I'm thinking of an args
family.
read -p
is the first feature comes to mind. The protable way is already hiddent in the app_{trace,pause}
pair.
app_trace ()
{
: report the CALLER name 0, its args, their caller 1, and the callers caller 2.;
: date: 2019-05-04;
local call_0="";
local call_1="";
local call_2="";
local cdepth=${#FUNCNAME[@]};
[[ $cdepth -gt 1 ]] && call_0=${FUNCNAME[1]};
[[ $cdepth -gt 2 ]] && call_1=${FUNCNAME[2]};
[[ $cdepth -gt 3 ]] && call_2=${FUNCNAME[3]};
app_pause "$call_0( $* ) $call_1 $call_2"
}
app_pause ()
{
: pause execution, displaying the arguments;
read -p "$@" < /dev/tty
}
Today, besides visit with Pat to "Charley Browns", I've worked up my wordcount library. The most important, most recent is _changes
. Here:
wordcount_changes ()
{
: adds CHANGES, day to recorded day, added words, adn
: .. MVAG, the n-day -- default 5 -- exponential moving average
: date: 2019-06-14;
: date: 2019-06-16;
function _wc_changes ()
{
addcol changes mvag | compute '
changes = ((last)?words - last:0);
last = words;
' | row 'i_date> 190609' 2> /dev/null | tee .bysort | column i_date changes mvag | compute "
fc = 1./$1.; fl = 1 - fc
mvag = lx * fl + changes * ((lx)? fc: 1.0);
mvag = sprintf(\"%6.1f\", mvag)
lx = mvag;
"
};
wordcount_byday > .byday
_wc_changes ${1:-5} < .byday
}
wordcount_byday ()
{
: date: 2019-06-10;
: date: 2019-06-16;
wordcount_words | rdb_iDate | column i_date words | rd sort -r | printfirst | rd sort
}
At the moment, I'm having trouble piping from wordcount_byday to _wc_changes.
In byday, rdb_iDate
collapses insert_time to i_date, and the sort -r | printfirst idiom is effectively a "printlast" command; the final sort returns the order.
The _wc_changes
function adds changes and mvag column, first computing the changes. Then it selects the dates since June 9th, 2019, when I got this working, collection the insertion date (i_date), the number of changes(changes) and saving space for the moving average (mvag). The single argument to wordcount_changes
, defaulting to 5 is the number of days to consider for the exponential moving average. Here's a sample formula:
wi = 1 / ( 1 - N ); wa = 1 - wi
avg(1) = x(1)
avg(i+1) = wa * avg(i) + wi * x(i+1)
Where N is the number of terms to consider. Here's the sample output for today:
$ wordcount_changes
i_date changes mvag
------ ------- ----
190610 312 312.0
190611 1974 644.4
190612 662 647.9
190614 557 629.7
190615 555 614.8
190616 476 587.0
$ ...
Or with a longer time constant:
$ wordcount_changes 7
i_date changes mvag
------ ------- ----
190610 312 312.0
190611 1974 549.4
190612 662 565.5
190614 557 564.3
190615 555 563.0
190616 476 550.6
$ ...
Today, a day of invesment practice, among other things, I added a summary of the Motley Fool Recommendations. This after reconciling my all-precious wordcount functions to account for deleting a bunch of files. First the Motley Fool. The functions are in the foolish
family, the prep function is looking for a file: foolSA.html
. I collect the Recommendations from the Stock Adviser page. Just sweep over the page with the cursor, collecting all the text, and paste into foolSA.html
. Any file would do; that just happens to be the name in the code.
The columns are named for the fields on that page. The only problem was naming the Return Percentage. The simple "return" didn't work, awk
the language of my instance of rdb, dis-allows the language built-ins as variable, or field names.
It turns out the lines in the pasted text with eleven tab-separated entries uniquely distinguish the data fields according to the column names. The run function then calls the prt function. This returns the uploadable table I use in my online filters. The useful fields: the symbol, date, risk, status, rtnPct, and the name. The Fools were clever enough to encode their status in the name. The default value I call OPEN. It turns out that any Now stock is also a Starter, hence the distinguishing selection. There is a plethora of closed stocks. Taking advantage of this list is an implementation must at this point.
A clever use of the **app* preparation is the echo the entry_point function name. And now for the technical detail: how to include the code for the app in this document
Here goes
foolish_columns ()
{
app_trace $*
${*:-echo} date name symbol cap whose risk arp rtnPct sNp vsSnP fav
}
foolish_rpt ()
{
report_notpipe && return 1
column symbol date risk status rtnPct name | compute '
status = ((name ~ /Closed/)? "closed": \
((name ~ /Now/)? "Now": \
(name ~ /Starter/)? "Starter": "OPEN" \
));
sub(/ \(Closed.*\)/,"",name);
sub(/[%]/,"",rtnPct);
sub(/[+]/,"",rtnPct);
rtnPct = sprintf("%6.1f", rtnPct);
'
}
foolish_prep ()
{
[[ -f $1 ]] || {
app_trace $*
local recfile="https://www.fool.com/premium/stock-advisor/recommendations/all/"
printf "copy/paste page @ %s to local %s\n" $recfile $1
return
}
app_trace $*
foolish_columns rdb_hdr
awk -F'\t' 'NF == 11' $1 | tee foolish.tab
}
foolish_run ()
{
set foolSA.html foolish.rdb foolish.tsv
app_trace $*
newest $2 $1 || foolish_prep $1 > $2
shift
app_trace newest $2 $1 $(which foolish_app)
newest $2 $1 $(which foolish_app) || {
app_trace $*
cat $1 | foolish_rpt | tee $2 | justify
}
llrt *fool*
}
echo foolish_run
Voila!! It worked. I'll likely need a better convention than using the app as a suffix. But the INCLUDE file is nothing more than the
#+BEGIN_SRC sh
... the app source
#+END_SRC
BEGIN, END pair wrapping the code in the ../bin directory. Itself a simple function. Here's the actual INCLUDE:
#+INCLUDE: ../lib/foolish.app
In my current practice, I've now learned to use a feature a bunch of times before generalizing.
Today's test is for the awk command-line arguments. To add library functionality to the /RDB commands, notably row and compute, the -f
argument may have more than one file. The remaing question is "may the the row- or compute-specific code" be in the script in addition to file aguments?
This morning I ran off a handful of birthday-generating functions. Since I captured most of the work in a generating function, I'm comfortable to include the display and not feel like I'm cheating in adding these to my wordcount tally.
$ birth_collection declare -f
birth_make ()
{
eval "birthdays_${1} () { echo {19{$2..99},20{0,1}{0..9}}$3; }"
}
birth_weekdays ()
{
foreach echo $($1) | gregorian_pc
}
birth_family_martybetty ()
{
birth_make martyjr 20 1028;
birth_make betty 20 0915;
birth_make dan 45 0802;
birth_make marty3 44 0813;
birth_make sean 52 0819;
birth_make meg 46 0801;
birth_make vince 50 0726;
birth_make kev 51 0617;
birth_make maureen 56 0602;
birth_make brendan 55 0602;
birth_make mike 61 0412
}
$ ...
What's needed is a fourth argument: first post-death birthday, to trim the list to cover one's life. The list begins with each person's zeroth (date of) birth.
$ birth_weekdays birthdays_marty3 | field 1 | sort | uniq -c
10 Fri,
12 Mon,
11 Sat,
11 Sun,
10 Thu,
11 Tue,
11 Wed,
The sum – 76 – therefore says I'm 75 this year.
These snippets are made possible by this provenance capture device:
$ qcmd nprov nprov_eg
nprov ()
{
: date: 2019-07-01;
echo "$ $1";
eval "$1"
}
nprov_eg ()
{
: date: 2019-07-01;
nprov 'birth_weekdays birthdays_marty3 | field 1 | sort | uniq -c' | tee ~/.x
}
Of course, the _eg function is a good thing to have around, where the function's use may be in a command line instance with other tools.
In which I accomplished a great deal with the column
and it's immediate dependant functions. The feature has lingered in limbo for most of the three decades I've used /RDB. Today I finally slayed the dragon:
app_trace
addcol
wordcount
appsAs a result, the function is now more understandable. First the relevant functions with their line count, which includes comments Then the column
function.
$ wc -l .functionlib/column* | grep -v unique; declare -f column
18 .functionlib/column
36 .functionlib/column_collect
5 .functionlib/column_data
13 .functionlib/column_does
11 .functionlib/column_indices
88 total
column ()
{
: date: 2019-07-02;
read HEAD;
[[ -z "$HEAD" ]] && {
( echo;
cat ) | listtotable | $0 "$@";
exit 0
};
: DASH is useless after this point, see use of rdb_hdr.;
read DASH;
: app_trace "HEAD: $HEAD, args: $*";
column_indices;
set -- $(column_collect ${*:-$HEAD});
HEAD="${*:2}";
: app_trace $1 HEAD: $HEAD;
[[ -n "$1" ]] && column_does $1
}
To the casual reader, an rdb file has two header lines:
These are followed by rows of data, which are the relations in the relational data base. Here is an /rdb-genrated list of tables in a certain directory. This table lists a table, and for each table, its list of fields.
$ tables *.rdb | justify
table fields
----- ------
TagLibrary.rdb tag library
bintags.rdb name tag value
books.rdb rank share total handle
dnldReports.rdb report file_RE fields
empty.rdb column
foolish.rdb date name symbol cap whose risk arp rtnPct sNp vsSnP fav
h.wchdr.rdb insert_time delete_time lines words chars file
h.wordsnxt.rdb insert_time words file
h.wordsold.rdb insert_time words file
libdndlReports.rdb report file_RE fields
om.rdb object method
references.rdb name url comment lead trail
stat.rdb
tables.rdb table fields
test.rdb
todo.rdb thru proj todo
wchdr.rdb insert_time delete_time lines words chars file
wherefun.rdb fname library others
whoKnows.rdb FN REV EMAIL
wordcount.rdb lines words chars file
Where justify
is the /rdb command to pad each column to vertical alignment.
Which brings up the need to fix the row
and jointable
commands. Note, for later discussion, the listtotable
command in column
. The command accomodates /rdb tables in the list format, where the first line is empty.
$ tablelines | tabletolist 2>/dev/null | sed 12q
table TagLibrary.rdb
lines 21
table bintags.rdb
lines 181
table books.rdb
lines 100
table dnldReports.rdb
lines 7
I've sufficiently padded today's wordcount.
Without much further comment:
$ nprov_eg
$ declare -f nprov{,_eg}
nprov ()
{
: date: 2019-07-01;
echo "$ $1";
eval "$1"
}
nprov_eg ()
{
nprov 'declare -f nprov{,_eg}' | tee ~/.x
}
Today, it's wfc
– Word Frequency Count
wfc ()
{
: date: 2019-07-12;
report_notfile ${1:-/dev/null} && return 1;
tpl $1 | lc | sort | uniq -c | awk '$1 > 1 && length($2) > 3'
}
tpl ()
{
: returns STDIN or NAMEd files as a Token Per Line;
cat ${*:--} | tr -s ' \t' '\n'
}
lc ()
{
: transliterate STDIN or command ARGS to lower case;
: date: 2019-03-24;
stdin_orargs $* | tr A-Z a-z
}
stdin_orargs ()
{
: treat STDIN as args, OR echo any args, default to CALLER;
: date: 2019-03-24;
[[ -p /dev/stdin ]] && cat || echo ${*:-$(myname 2)}
}
Which brings along tpl
, lc
, and a real gem stdin_orargs
Reading wfc
, we see:
awk
ignores un-repeated words and those with 3 or fewer characters (mostly articles, prepositions, and conjections).The lastest fix in my /RDB package was to column
and its sub-functions _{does,collect}
. The problem arose in my treatment of the Columns
variable. In the process of factoring the command into functions, I told myself "wait until ignoring it raises its ugly head", or some such cautionary thought. It finally did when tabluating entries by date, particularly in the wordcount exercises. In a pseudo-code outline, I'd:
words
field, simply use the last record on the given dateThe problem arose then since the addcol
function appends its added fields to the end of the record. And is discovered in having ignored the new fields in the column
function.
column ()
{
: date: 2019-07-02;
: date: 2019-07-14;
read HEAD;
[[ -z "$HEAD" ]] && {
( echo;
cat ) | listtotable | $0 "$@";
exit 0
};
: DASH is useless after this point, see use of rdb_hdr.;
read DASH;
: app_trace "HEAD: $HEAD, args: $*";
column_indices;
set -- $(column_collect ${*:-$HEAD});
HEAD="${*:3}";
: app_trace $1, new: $2, HEAD: $HEAD;
[[ -n "$1" ]] && column_does $1 $2
}
column_does ()
{
: date: 2019-07-02;
: app_trace HEAD: $HEAD, COL: $1, new: $2;
: date: 2019-07-14;
local Columns="";
local nc=$2;
while [[ $nc -gt 0 ]]; do
Columns="${Columns} ";
(( nc -= 1 ));
done;
: app_trace "Columns: <$Columns>";
( rdb_hdr $HEAD;
tee .does.in | sed "s/\$/$Columns/" ) > .awk.in;
cat .awk.in | awk '
BEGIN { FS=OFS="\t"}'"
{ print $1 }
"
}
column_collect ()
{
: collects the columns;
: e.g "$1,$7,$4, ...";
: and grows the HEAD.;
: where HEAD is the list of input column names;
: returns COLS heada headb ...;
: date: 2019-07-02;
: date: 2019-07-14;
local COL=;
local newcol=0;
: app_trace $*;
for arg in "$@";
do
C=1;
for i in $HEAD;
do
: app_trace arg: $arg,i: $i;
[[ "$i" = "$arg" ]] && {
shift;
break
};
eval C=\$nC${C};
done;
COL="${COL:+$COL,}\$$C";
: app_trace COL: $COL;
[[ "$i" != "$arg" ]] && {
HEAD="${HEAD:+$HEAD }${arg}";
(( newcol += 1 ));
: app_trace HEAD: $HEAD;
: todo: count the number of new, empty columns;
: and find a way to pass to column_does;
: Columns="${Columns} "
};
done;
: app_trace COL: $COL, HEAD: $HEAD;
: app_trace Columns: {$Columns};
printf "%s\t%s\n" "$COL" $newcol "$HEAD"
}
In column_collect
, the correction was to insert the newcol
local variable to contain the count of newly inserted column names. The commented out statement:
Columns="${Columns} "
was where I'd chosen to ignore the fields. As the code was now factored into functions, I didn't like the idea that the variable was a character string. Passing a string through function calling sequences I've always regarded as risky, requiring too much attention to detail. A single number 0, 1, 2, … seemed much more reliable. The Columns
string is consumed in column_does
, where the necessary Tabs are appended to the records with this line of code, using sed to append.
tee .does.in | sed "s/\$/$Columns/" ) > .awk.in;
Passing the number newcol
through the function interface meant inserting it as the second returned string variable from column_collect:
printf "%s\t%s\n" "$COL" $newcol "$HEAD"
It turns out it could have been first, but note the COL
string is the mapping of names to awk field positions, so central to the /RDB model:
$5,$2, ...
where these have been arranged in the main loop in column_collect by this statement:
COL="${COL:+$COL,}\$$C";
Noting that the field names are sequenced by the the HEAD variable:
HEAD="${HEAD:+$HEAD }${arg}";
The interface between column_collect and column_does still relies on HEAD
being an environment variable. This code is still the opportunity for improvement. For the time being, I'm enjoying its much more effective behavior.
This use of column
also supports an inplace use of the addcol
feature. The down-side to that is it inherits an added maintance cost when having to express the current list of fields coming down the pipe.
I've just ressurected the oldbook contents. Browsing my wordcount_data, I realized it disappeared on June 21st. The delete_time on records in pubs/marty3/commonplace/oldbook all point to a moment on that day. And for the time being, I'll leave oldbook out of sight of wordcount, how about pubs/marty3/commonplace/.oldbook?
I'll have to read, edit, curate the thoughts there. That, and find a way to annotate the work. How about an /RDB table?!
These few words are part of an experiment to change the wordcount model. I'm thinking of using daily differences in stat usage to note the files changing on any day, and calculate the diff's in each file's wc, rather than relying on the 'words' tally; it's too gross a statistic
Wrapping up the Mens Club contacts work. In the last few days, after reading Manis' Unix Relational Database Management. I'm persuaded to lean on it a little harder – learn to depend on your public utilities – Sam Dressler's dad.
So, yesterday and today, a couple of functions adding insight to the csv to rdb process:
NAME member_init ./locallib
CODE
member_init ()
{
unset contacts;
setget contacts $(home)/MensClub/membership/contacts;
pushd $(contacts);
mkContacts SB_All_Men _6-25-2019;
mkContacts iContact -import;
mkContacts membership -190715;
app_trace CONTACTS: $CONTACTS
}
NAME mkContacts ./locallib
CODE
mkContacts ()
{
eval "$1$2 () { \${*:-echo} $(contacts)/$1$2.csv; }";
eval "$1 () { \${*:-echo} $(contacts)/$1$2.rdb; }"
}
USED_BY
member_init
member_init
member_init
EXAMPLES
member_init mkContacts SB_All_Men _6-25-2019;
member_init mkContacts iContact -import;
member_init mkContacts membership -190715;
NAME canonemail ./locallib
CODE
canonemail ()
{
: app_trace $*;
set -- $(basename $1);
: app_trace $*;
set -- ${1%.*} $(myname 2).rdb;
: app_trace $*;
$1 emakey_rdb | tee .emakey.out | row 'email ~ /@/' | tee .email.out | compute 'email = tolower(email)' | rd sort > $2
}
USED_BY
icontactemail
loweremail
memberemail
EXAMPLES
icontactemail iContact-import canonemail;
loweremail SB_All_Men_6-25-2019 canonemail
memberemail membership-190715 canonemail;
NAME emakey_rdb ./locallib
CODE
emakey_rdb ()
{
: date: 2019-07-20;
: app_trace $*;
csva_rdb $1 | column $(args_uniq email $(csv_fields $1))
}
USED_BY
canonemail
EXAMPLES
canonemail $1 emakey_rdb | tee .emakey.out | row 'email ~ /@/' | tee .email.out | compute 'email = tolower(email)' | rd sort > $2
The centerpiece of the plan is canonemail
and emakey_rdb
. The later accepts a smart file, and using csva_rdb
(there could be csvb, … depending onthe variety of formats) puts the email field first in the list of the table's fields. The csv_fields
plucks the field names from the input CSV file. It's last fix was the trickiest:
csv_fields ()
{
: app_trace $#;
set -- $(sed 1q $1| fmdos | sed 's/ //g; s/[^\[]*\[//; s/\[//g; s/\]//g' | tr A-Z, a-z\\t);
: app_trace RETURN $*;
echo $*
}
The trickiest part, which still seems portable, is this bit:
s/[^\[]*\[//; ...
which skips any none left square bracket prior to the first, which in the iContacts download, a pernicious 3-byte seqeunce whose meaning I chose not to explore. Just throw away the non-alpha numeric characters in the first line of the CSV file.
The canonemail
function selects just those records with legitimate email addresses, folds to lower case, and sort the output into the /rdb file defined by the calling function.
I've used a repeated
function for some time. Mostly for examining deep backup trees, and trimming leaves and branches. Today, I thought to cleanup the interface. The problem is that
repeated .bak/ N # say 3 returns
.bak/.bak/.bak/
where I'd like it to return .bak/.bak/.bak
so when used in an expression, it makes more sense to throw in the trailing slash. So, the new function is nbak
, used thusly:
$ declare -f nbak; ls $(nbak 7)/f*lib
nbak ()
{
: date: 2019-07-22;
backup_varnames;
local bak=$(dir_backup);
echo $bak$(repeated /$bak $(expr ${1:-3} - 1))
}
.bak/.bak/.bak/.bak/.bak/.bak/.bak/fapilib
.bak/.bak/.bak/.bak/.bak/.bak/.bak/fixlib
.bak/.bak/.bak/.bak/.bak/.bak/.bak/ftplib
.bak/.bak/.bak/.bak/.bak/.bak/.bak/functionlib
This was necessary to repeat the directory-separating slash one less time than the directory names.
Also, the default backup directory name now varies. In the past, the hidden .bak/ was sufficient. Now working with the Mens Club, using Dropbox on Windows, a hidden name is difficult, if not impossible, so in that directory tree, the dir_backup
is back/. (Should it be a more natural backup/?)
The convenient default is 3.
Today's inspiration came while lifting a private file into the public view. The challenge was to simplify the collection by always using a local orgheader.org
file. It's enough work to locate the source being imported. In this case, it's a piece of work I did to show off Rob Hoffman's November 1943 National Geographic photo captions. The source is housed in my "every/word" collection. Here's the copy of the re-hosted source:
#+TITLE: National Geographic Photo Captions, November 1943
#+AUTHOR: Marty McGowan
#+EMAIL: martymcgowan@alum.mit.edu
#+OPTIONS: ^:nil
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="./style.css" />
#+INCLUDE: ../../../marty3/lib/NatGeo_LXXXIV-5,November43.org
** COMMENT hidden
commonStyle, headStyle
how to publish a file not on the publish tree.
The included NatGeo_… file lives in a directory, just out of sight of my parallel local, web host trees. The files there are not meant for general public display. Any one of them may be made availaible by the technique here. The common challege is to make a local org header which uses the common style sheet.
So, here's the resulting orgheader.org
file and the orgheader function which produced it:
$ cat orgheader.org; declare -f orgheader
#+AUTHOR: Marty McGowan
#+EMAIL: martymcgowan@alum.mit.edu
#+OPTIONS: ^:nil
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="../../../lib/style.css" />
orgheader ()
{
: condition a generic orgheader with local style sheet;
: to point to an N-level specific orgheader with N-level Style sheet;
: date: 2019-07-29;
local d=$(echo ${PWD#*/Dropbox} | awk -F/ '{ print NF-1 }');
local l=$(home)/lib;
local o=$l/orgheader.org;
local n=${o%.org}_${d}.org;
local r=$(repeated ../ $d)lib;
: app_trace "d: $d, l: $l, o: $o, n: $n, r: $r";
[[ -f $n ]] || {
sed "s@./style.css@${r}/style.css@" $o > $n
};
ln -f $n orgheader.org;
${*:-echo} orgheader.org
}
The sed script relies on an existing $(home)/lib/orgheader.org
file with a stylesheet invoked thus:
href="./style.css" ...
Note, the script is only executed for non-existing levels.
This one has been perplexing me for a long time: How does one distinguish a re-directed input file:
command < file
from a stream on a pipe, such as an identical result from:
cat file | command
To unpack this I also needed some tooling on my provenance command. Here is the sequence with the four test cases. The resulting function is pipeFileOrNot:
$ show_nprov
$ wc *.rdb; declare -f pipeFileOrNot show_nprov
139 281 2837 command.rdb
pipeFileOrNot ()
{
[[ -p /dev/stdin ]] && echo it was a PIPE || {
for f in $*;
do
[[ -f $f ]] && return;
done;
[[ ! -t /dev/stdin ]] && echo it must have Been a FILE
}
}
show_nprov ()
{
nprov "wc *.rdb; declare -f pipeFileOrNot show_nprov";
echo =====;
nprov 'pipeFileOrNot *.rdb';
nprov 'pipeFileOrNot < *.rdb';
nprov 'cat *.rdb | pipeFileOrNot';
nprov 'pipeFileOrNot foo bar zot < *.rdb'
}
=====
$ pipeFileOrNot *.rdb
$ pipeFileOrNot < *.rdb
it must have Been a FILE
$ cat *.rdb | pipeFileOrNot
it was a PIPE
$ pipeFileOrNot foo bar zot < *.rdb
it must have Been a FILE
Taking input from a named file produces no result; a re-direction declares it must have been a file; the simplest to detect is the stdin
is on a pipe. The tricky one is re-direction in the face of other command arguments.
Having worked out the four test cases, it was first a simple matter to collect them on a command line, prepend the evidence of a file, show the two functions, and throw in the delimiter.
The last test case is a bit of kludge. The function assumes if any of the command arguments is a file, then there must be input re-direction, an equally flawed assumption.
I haven't been able to find a test for re-directed input.
The assumption for re-directed input is
if no command argument is a file AND
if the standard input is NOT a Terminal, THEN
the input must have been re-directed from a FILE
A simple function today, rdb_fieldcheck
, to clean up the justify problem. The justify problem is when a command | justify breaks because the command output has an inconsistent field count. The simple fix is to pipe the result through column
command | column | justify
Today's fix realized the problem was upstream. A table had an inconsistent number of fields. There's the fix: rdb_fieldcheck
.
rdb_fieldcheck () { awk -F'\t' '!p[NF]++ { print NF, $0 }' $*; }
Since the HEAD record contains the names of the fields, the first line prints the number of fields followed by the field names. Any subsequent records which have a different number of fields, i.e. incorrect, produce a line: the number of (now) different fields, followed by the record.
Simply use the information to correct those records in the upstream table. And remember to keep checking, since this algorithm only prints the first offending record.
A lesson learned: the result is so clean, I was tempted to turn it into a table. Why would one? Other than a too-tidy desire, but putting the result to work on cleaning up the table was simple enough that I realized there's little need to collect the data. And the offending records are captured with my existing history mechanism, rd_syncf
.
These functions collect and arrange bank and credit/debit card accounts in a canonical form. As they stand, the collect all the available data. I'm working to use a "make, newest" approach. CSV Files in the data directory bearing an account number newer than the history file, e.g. history_8808.rdb
will be collecte, arranged, and appended to their respective history file.
The history files are concatenated, respecting their common header in to a single accounts.rdb
file.
Work is also in progress to produce join on the checks table.
Starting with csv0_rdb, if collect_acct shows no newer account files, then return, otherwise:
In csvb_rdb, the clean_acct function tidies up the field names and their contents. Of particular interest is the account 8808, which retreives the CSV fields named: rundate, account, action, and amount.
The common field names are quite a bit the same, but differ slightly. The separate field_adjust_ functions both assign the name changes, e.g. rundate becomes simply "date", and decompose fields with multiple fields included under one name. e.g. in 8808 the "action" field breaks into the "with" field, which might have been called "counterparty", and splits out an included check number into the "check" field. It also detects a transaction "type" according to the list of things to "look" for.
A piece of magic is hidden in the clean_acct function:
args_uniq $(fields_common) $(fields_${1})
where args_uniq does just that, returning the first appearance of an argument name in it's list. In this way, the fields_common are the first fields in any output list, and if any csv has a common field name, it's placed in it's position simply by the order of the names. Notice that clean_acct calls a table-specific function for the re-assignment.
The final clean-up piece, using date2yymmdd defensively converts both mm/dd/year or m/d/year formats into yymmdd.
lfin_init ()
{
: create an empty absent checks table;
[[ -f checks.rdb ]] || {
rdb_hdr $(fields_checks) > checks.rdb
};
: Main Loop ...;
acct_names foreach csv0_rdb;
table_names rd_cat > accounts.rdb;
tables | tee tables.rdb | justify
}
fields_adjust_8808 ()
{
compute '
account = "8808";
with = action;
date = rundate
if (index(with,"Check Paid #")) {
check = substr(with,length("Check Paid #") + 2)
} else {
look = "DEBIT CARD PURCHASE:BILL PAYMENT:CASH ADVANCE"
look = look ":DIRECT DEBIT:DIRECT DEPOSIT:TRANSFERRED"
look = look ":ADJUST FEE"
for (n = split(look,seek,":"); n; n--) {
if (index(with,seek[n])) {
type = seek[n]
with = substr(with,length(type)+2)
}
}
}
'
}
fields_8808 ()
{
${*:-echo} rundate account action amount
}
fields_common ()
{
${*:-echo} date account type amount check with
}
collect_acct ()
{
set ${1:-8808};
ls -t *$1* data/*$1* | awk "/history_$1.rdb/ { exit }; { print; } "
}
clean_acct ()
{
: date: 2019-08-07;
app_trace $*;
column $(args_uniq $(fields_common) $(fields_${1})) |
sed 's/&/\&/' | fields_adjust_${1} |
row 'date !~ /download/' | compute 'amount = sprintf("%8.2f",amount) ' |
date2yymmdd
}
csv0_rdb ()
{
: date: 2019-08-07;
local stem=$1;
set -- $(collect_acct $1);
app_trace "#: $#, args: $*";
[[ $# -gt 0 ]] || return;
awk NF $* > .accum_history_$stem.csv;
set -- history_$stem.{old,rdb,nxt};
: collect changes, new data in NXT;
csvb_rdb $stem > $3;
: mv RDB to OLD;
mv $2 $1;
: RD cat OLD NXT into RDB;
rd_cat $1 $3 | rd sort -u > $2
}
csva_rdb ()
{
: date: 2019-07-20;
rdb_hdr $(csv_fields $1);
tail +2 $1 | fmdos | cancsv | sed 's/ *, */ /g'
}
csvb_rdb ()
{
: date: 2019-08-07;
set -- ${1:-8808};
csva_rdb .accum_history_$1.csv | rd sort -u | rd grep 2019 | clean_acct $1 | fields_common column
}
date2yymmdd ()
{
: fold DATE in M/D/YYYY into YYMMDD;
: by concatenating the 3 pieces of the split field;
compute '
n = split(date,ymd,"/")
if (n == 3) {
date = ymd[3]%100 sprintf("%02d",ymd[1]) sprintf("%02d",ymd[2])
}
'
}
for the record, here is the callgraph for the lfinlib function library:
Yesterday was "moving day" for the /RDB history files. The history of an *.rdb file has been kept in a companion file with a h. prefix. For example the table tables.rdb
would have it history in stored in h.tables.rdb
. What is the history?
The history of a table adds two fields to each record, it's insert and delete times. Realizing the current records in the table haven't been deleted, their record in the history file contains a time value in the insert_time field, but none in the delete_time field. Except during periods of editing, the table may be reconstructed from the history table, thusly:
row !delete_time < h.tables.rdb | ncolumn {insert,delete}_time > tables.rdb
The RDB ncolumn command removes the named columns from the table.
Since the history will have many more records than the table itself, it seemed prudent to save a compressed version. One wonders if the time it takes the user, certainly not the computer, is worth the space and/or mental savings?
In any case, the history may be compressed, and uncompressed at will with companion commands
compress table.rdb
uncompress table.rdb.Z
Where **.Z** is the suffix for a compressed file. In the case of a history file for a table, the compressed history has been in h.tables.rdb.Z
. And an important note. Compressing and uncompressing a file is a toggle: after compressing a file, the uncompressed file is removed. One or the other exists. In order to read and process the compressed data, a command exists to retreive it without having to uncompress the file. These two commands produce the same data:
$ cat h.table.rdb
$ zcat h.table.rdb.Z
This little function handles either case:
will work regardless the state of the file, compresses or not.
The motivation to move the history and it's compressed version arose while managing contact data for the community Mens Club. The team makes regular use of the XLS spreadsheets for our roster, keyed on the ment's email address. We have records on roughly 700 men in the community, of which typically half are members. Since I'm comfortable using /RDB, and feel it more productive than Excel, I'll work with *.rdb files. The conversion is simple: export a worksheet as a *.csv file. I've a working csv to rdb conversion tool:
csva_rdb ()
{
: date: 2019-07-20;
: app_trace $*;
rdb_hdr $(csv_fields $1);
tail +2 $1 | fmdos | sed 's/ *, */ /g'
}
emakey_rdb ()
{
: date: 2019-07-20;
app_trace $*;
csva_rdb $1 | column $(args_uniq email $(csv_fields $1))
}
csv_fields ()
{
set -- $(sed 1q $1| fmdos | sed '
s/ //g
s/[^\[]*\[//
s/\[//g
s/\]//g
' | tr A-Z, a-z\\t
);
: app_trace RETURN $*;
echo $*
}
...
$ emakey_rdb file.csv > file.rdb
Where csva_rdb
, called by emakey_rdb
produces the column headers rdb_hdr
, and converts the comma-separated fields into tab-separated for rdb. The csv_fields
function appears the most complicated. The stream-edit commands accomodate a few different conventions I've encountered in various csv header lines. There's also a just-higher-level command to accomodate non-ascii characters in a csv file. I'll have to take that up with the email service provider.
Another reason for using /RDB in preference to Excel. The service provider can deliver a table of "all men", regardless of their status. We have a separate field to mark the members, but the other relevant status situations are "Do Not Contact" and "bouncing". The service provider does not put the status in the global download. It is only possible to download each status separately. Separate CSV to RDB files are at least as simple as separate worksheets on a spreadsheet. Here, the RDB advantage is manifest, since it, a database, has a jointable feature, modeled on a spreadsheet by cut and paste, and eliminating records with a redundant key.
After some consideration, I decided to keep the history in a separate directory, rather than distinguish it by a prefix. What this does is take what may be intrusive files from the non-RDB user's perspective, and put them just outof site. Also, since a backup utility does the same thing, the only "strange" files to the non-RDB user are the current version of "All Men", the two separate status tables, and the likely join into the master.
So, what directory?
The solution, worked out yesterday and today, is to put the history in a **.hry** directory. The h.table.rdb
moved to .hry/table.rdb
.
h.table.rdb => .hry/table.rdb
Similarly for the compressed file to .hry/table.rdb.Z
. At a moment's reflection, it seems contrary to file-naming conventions. A like file name in any sub-directory sould be of the same content format as the one in the most-parent directory. When the compressed file is uncompressed, it stays in the .hry directory, unless the user moves it. Since most of the table handling, outside of simple editing is handled by tools, it seems little matter whether the file is identified by a prefix, or separate directory.
Since the idea arose for my history mechanism arose in the late '80s, after fresh experience with the Source Code Control System (SCCS), which used name prefixes s. and p. to note the state of the file, the simple h. seemed appropriate.
To move the files, I used a few scripts. The challengs was not the files themselves, but their backups, because it's now a directory chain. The history and compressed history had to move in the backup chain. For example
.bak/.bak/.bak/h.tables => .hry/.bak/.bak/.bak/tables
Here a function for that job, with sample output:
tohrya ()
{
find . -name 'h.*' -type f | grep .bak/ | grep -v '\/.tmp\/' |
tee .backs.txt | sed '
s/\.bak/ &/
s/\/h\./\/ /
' | awk '{
fmt = "ln -f %s $(needir %s.hry/%s)%s\n"
printf fmt, $1 $2 "h." $3, $1, $2, $3
}' | tee hry.sh;
comment source hry.sh
}
ln -f ./bin/.bak/h.functionlib $(needir ./bin/.hry/.bak/)functionlib
ln -f ./bin/.bak/h.tables.rdb.Z $(needir ./bin/.hry/.bak/)tables.rdb.Z
ln -f ./bin/.bak/.bak/h.functionlib $(needir ./bin/.hry/.bak/.bak/)functionlib
ln -f ./bin/.bak/.bak/h.tables.rdb $(needir ./bin/.hry/.bak/.bak/)tables.rdb
ln -f ./bin/.bak/.bak/h.foo.rdb.Z $(needir ./bin/.hry/.bak/.bak/)foo.rdb.Z
ln -f ./bin/.bak/h.functionlib.Z $(needir ./bin/.hry/.bak/)functionlib.Z
Where the needir function creates a needed directory, returning the name.
Time to design, right here in this space, a "fix" for my daily wordcount moving average calculation. What I want to do is fill in those days on which I've not recorded any writing. It may have been recorded the following day, so there will be some blank dates in the log.
The idea is to have a table of blank dates, insert them into the daily record of "date, words written" pairs, and distribute the words on the following day between the two dates. Thinking about an /RDB handling of the problem, it seems my first challenge is to see if there's a way to substitute an awk END { calculation }
into the compute function.
It's 08:24. Let's see how long it takes to discover the possibility.
This is interesting. I've modified compute to handle the possibility of an END, or any other awk blocks. Like this:
compute 'expr' # may now be
compute 'expr } END { more expr'
The change was possible because the previous internal handling:
BEGIN { FS=OFS=\"\t\"}; { $COLIN $1; $COLOUT print }"
which treats the expression $1 as part of the same awk-block as the assignment statements, COLIN as the print delivery statements: COLOUT, may be rendered as two blocks without change to the program's behavior:
BEGIN { FS=OFS=\"\t\"}; { $COLIN $1 } { $COLOUT print }"
It's now 08:55 and we can say "case closed". The one underlying difficulty is the END block has to know the number (and maybe type) of the fields. Here's my simple test case:
tables |
compute 'nrecrds/=2 } END {
printf "%s\t%s\t%s\n", "", "", "Hello, World"
' 2>compute.err | tee tables.rdb | justify
which wrote "Hello, World" into the third field of the last record.
I suppose it's possible to insert other blocks in this fashion. I believe multiple BEGIN blocks are also amenable to the awk program.
Here's a first cut at the new feature:
wc_fixdifsA ()
{
: todo: leaves out the other split record
rdb_cat wc_daily*.rdb | rd sort -r | compute '
ndt=i_date
ndw=difs
if(!difs) { i_date=ldt; difs=ldw/2 }
ldt=ndt
ldw=difs
' | rd sort -r | printfirst | rd sort
}
And it's implememntation, where a useful discovery: don't try to repair everything at once.
The solution to the problem is to have an ancillary file of dates, otherwise missing from the table of days and words written. And absorb those dates into the mostly filled in table, putting the result in a NXT file, rather than overwrite the RDB table.
Then manually review the results, using app_trace as a gate to keep the function from running to completion.
wc_okdifs ()
{
: after running fixdiffs, this one absorbs dailydiff.nxt;
: as the updated dailydiff.rdb, so as to avoid duplicating;
: records in dailymiss.rdb;
set -- $(shell_onlyfiles wc_daily{diff,miss}.{rdb,nxt});
: app_trace $*;
local nlines=$(cat wc_dailymiss.rdb | wc -l);
app_trace nlines: $nlines;
wc $*;
[[ $nlines -gt 2 ]] || return;
app_trace about to update $*;
backup $1;
mv $2 $1;
rdb_hdr $(sed 1q $1) > $3;
wc $(shell_onlyfiles $*)
}
wc_fixdifs ()
{
: blank entries in dailymiss.rdb fill in dates without written words;
: wordcounts are halved, successively if necessary into empty dates.;
app_trace $*;
local nlines=$(cat wc_dailymiss.rdb | wc -l);
app_trace nlines: $nlines;
[[ $nlines -gt 2 ]] || return;
rdb_cat wc_daily{miss,diff}.rdb | rd sort -r | ( read HEAD;
read DASH;
rdb_hdr $HEAD;
awk '
# { printf "\t\t\t%s\t%7d\n", $1, $2 }
NR > 1 {
ldifs = (($2)? ldifs: ldifs/2)
printf "%s\t%7d\n", ldate, ldifs
}
{
ldifs = (($2)? $2: ldifs)
ldate = $1
}
END { printf "%s\t%7d\n", ldate, ldifs }
' ) | rd sort | tee wc_dailydiff.nxt
}
With this wc_fixdifs
, it was easier simply to write a special-purpose awk program, rather than use the END-insertion technique I discovered yesterday.
Why?
The hurdle in the compute
function is the record output is the later action. There is no way to save values aftet the printing is done. This function works since we print the last record, not the current recored. You need to hold the prior record before printing, to see if you need to borrow some of its words.
The key insights here were ldifs
needs to be first be computed based on the present record: if there is an entry, then use the Last Diffs (ldirs); if not, divide the count in half. Then after printing, save either the current data, or the already divided difference. This could be done without using compute
but the hoops didn't seem worth the effort.
Yesterday, during a meeting with Mike Silverstein, he showed me my AnnualHandling paper had a good chunk of duplicated lines. This undoubtedly from my emacs OrgMode clumsyness. (I'll paste a good chunk of text to make sure I didn't loose some). Early this a.m., now 6:49, I thought of a conventient way to discover any duplicated text. Here's the resulting function duplinesearch:
duplinesearch ()
{
cat $1 | awk '
NF == 0 { printf "%7d\n", NR; next }
!prt[$0]++ { printf "%7d\t%s\n", NR, $0; lnum[$0] = NR; next; }
{ printf "%7d\t%7d\n", NR, lnum[$0]; }
'
}
For every record, preceed it with it's line number. The magic in this exercise is my printfirst trick, !p[$N]++
, where N is the awk field number, and !p[..]++, says "not subsequent instances of field number N", or, English, "only the first occurence".
The awk next
says "don't use any further patterns, read the next
record"
The last rule prints both it's line number and the line number where the matched (or duplicate) text first occured. Long runs of a pair of numbers indicate where the text was duplicated. Occasional snippets of copied lines are also detected. It's easy to edit the original file with the output report in an adjacent window.
Another you-learned-something-today day. Yesterday, I updated the local_run
function you see below, and realized it's not the top of the "info heap" in it's library. It's purpose is to standardize the name of a function who's job is to synthesize the top commands. Repeated use won't disturb any things you want to happen in the current directory, and even show off the steps.
So, whatever they are, the {addr,vcf}_digest
functions are important and may be run at will. From the next statement, we learn that they are
rdb_cat
togetherthe use of key_reduce
, for instances shows other possibilities, given multiple keys in the resulting table.
And lastly, the tables
command shows a synthesis of the tables in this directory.
Since this function collection is mostly about the keys in a database – a collection of tables – it seems appropriate to have a key_functions
. Recall the paradigmatic idiom for the smart function:
${*:-echo} …
where the generic echo
may be replaced by more convenient defaults, in this case declare -f
In all cases, the top of the heap must be the local _init
function, in this case key_init
. Note the final statement: key_init 1>&2
This is the one sort of allowable statement in a function library: any initialization functions are run as the last steps when a library is sourced into the current shell, with any standard output being directed onto the standard error.
local_run ()
{
addr_digest;
vcf_digest;
rdb_cat {addr,vcf}_digest.rdb | rd sort > digest_all.rdb;
key_reduce midunlavey;
tables | grep -v TbirdMac.rdb | tee tables.rdb | justify
}
add_record ()
{
report_notfile $1 && return 1;
set $1 ${1%.rdb}.new ${1%.rdb}.nxt;
( rdb_hdr $(sed 1q $1);
echo ) | tabletolist > $2;
${EDITOR:-emacs} $2;
rdb_cat $2 $1 > $3;
wc $*;
comment mv $3 $1
}
key_functions ()
{
${*:-declare -f} local_run add_record key_{functions,files,reduce,init}
}
key_files ()
{
${*:-echo} AddressBook.{csv,rdb} Companyand1120.{vcf,rdb} {deceased,occasion}.rdb {addr,vcf}_digest.rdb digest_all.rdb
}
key_reduce ()
{
: construct a reasonable combined record, from identical NAMEKEY records;
: date: 2019-08-26;
report_notargcount 1 $# use a NAMEKEY from digest_all.rdb;
key_combine $1 2> .key.err | listtotable | keyed_fields column | tee key_$1.rdb
}
key_init ()
{
key_functions;
declare -f fun_starter;
fun_starter ../bin/locallib
}
fun_starter ()
{
: hdr: -- PUBLIC ---------------------------------------- UTIL Functions --;
: from: fun_starter;
: date: 2018-02-08;
: date: 2018-02-12;
: date: 2018-02-16;
report_notfile $1 && return 1;
function initstmt ()
{
printf "%s 1>&2\n" $1
};
foreach initstmt $(functions $1 | grep _init$)
}
key_init 1>&2
The lesson here is each library, serving a function area needs a few standard functions:
Where the … will be a very few functions, frequently used, that you need help remembering.
Now time to got and put ./bin and ../bin in my PATH
Today's function has taken me months to craft. Mostly been hacking around it for this time.
The canonical format of an RDB Table is a TABLE.rdb
, and a history file of all the insertions and deletions, with the time of their recording, both insertion times and deletion times. An edited record is deleted and inserted at the same time. The old record is deleted; the new one inserted.
I've updated the three table files according to this table, where the table name is a generic for the various tables
state | current | old |
---|---|---|
table | table.rdb | table.rdb |
history | .hry/table.rdb | h.table.rdb |
compressed | .hry/table.rdb.Z | h.table.rdb.z |
The table name is unchanged; I've decided that all tables containing data worth recording is in a file with the rdb
suffix. In this sense, "recording" means those tables serving as a primary resource. I.e, joined tables needn't necessarily have their data recorded. There are execeptions. For instance, a current application is the Men's Club (Stonebridge, Monroe Twp, NJ), our contact managmemt system lists three statuses, likely mandated by federal law. Besides "active" in the club, for which we have a single flag detect, the email status may be bouncing, Do Not Contact, or Not on Any List. These three may not overlap. An address is in no more than one of these. I download "all contacts", but it only reports address and custom fields. To gather the state, it's necessary to download the individual statuses. So a roll-up into a single table of the email address and status makes sense as the place to record the history.
I've anguished about one facet of tne name convention I've adopted. In the old method, the different states were distinguished in the same directory by a naming convention, an "h" prefix for history and ".Z" suffix for the compressed file. The latter is the naming convention established the the operating system (.nix) tools: compress and **uncompress*. I adopted the prefix convention based on precedent used in the source code versioning systems of the day, in particular SCCS ( s. and p. ) and RCS. ( was it v. ?).
I decided within the last two weeks to put all history in a local directory, and named it **.hry/** along with my backup ( *.bak/** ) and and versioning ( **.ver/** ) systems. The difference with the table history from file history, is that tables track history of records, not entire files. So the history tree is about changes within the file, or table. So, in my old system the history record was distinguished from the data table by the h. prefix. And the appearance of the two fields recording the history of a record: insert_time and delete_time. So the table.rdb does not have thosed fields; the .hry/table.rdb has the same fields as the table in addtion to the two time fields.
A rule:
the .hry/table.rdb is NOT a backup of the ./table.rdb
Therefore:
the .hry/table.rdb shows the insertion time of every record,
and the deletion time of every former record of ./table.rdb
Now, the canonical process:
the current state of every table is the file table.rdb
and it's companion compressed history, .hry/table.rdb.Z
This function, rdb_canon drives intermediate states towards that objective. So, while it may be possible to remove the table and it's history format, so long as the compressed history is available, the current table may be restored. It's possible to manually update the history, and not it's compressed version. For example, another application I'm making is recording the amount of my daily writing. I take advantage of the daily tabulations of the insert_time and delete_time for the files which have changed on any given day. As it turns out, there is a command which allows extracting the history on the command line without having to restore the history file. A picture may be of assistance:
and delete
compress .hry/table.rdb # compresses to .hry/table.rdb.Z
uncompress .hry/table.rdb.Z # restores .hry/table.rdb
while
zcat .hry/table.rdb.Z | column ... # produces the same effect as
cat .hry/table.rdb | column ... # without the need to restore
The pipe is one of *nix's remarkable features making this a common practice.
rdb_canon ()
{
: default state of /RDB file: TABLE and PACKED HISTORY,;
: existing HISTORY is PACKED, {un}compress toggles the state;
: date: 2019-08-31;
local r=$1;
local h=.hry/$1;
local z=$h.Z;
[[ -s $h ]] || rm -f $h;
[[ -s $z ]] || rm -f $z;
while true; do
set -- $(newest_first $r $h $z | tr '\n' : ; echo);
: app_trace newest_first $r $h $z;
case $1 in
$z:*:$r | $z:$r*)
app_trace 1 $*;
rm -f $h;
return
;;
$r:$h*)
app_trace 2 $1;
h_update $r
;;
$r:$z*)
app_trace 3 $1;
uncompress -f $z;
touch $r
;;
$r*)
app_trace 4 $*;
rdb_hdr {insert,delete}_time $(sed 1q $r) > $h
;;
$h*)
app_trace 5 $1;
compress -f $h
;;
$z*)
app_trace 6 $*;
zcat $z | row !delete_time | ncolumn {insert,delete}_time > $r
;;
*)
app_trace 7 $*;
return
;;
esac;
done
}
In order:
The table, history, and compress file name are assigned r, h, and z names,
The history and compressed file, are removed if empty. (Actions depended on non-empty versions of these files)
the 1st positional parameter is set to a string with the time-ordered files ordered from newest to oldest. usually, but not necessarily one or the other the history or compressed history is absent.
separate cases distinguish:
compressed history is the newest file, where the history may be present or not
$z:*:$r | $z:$r*
the table is newer than the history file, regardless of the state of the compressed file
$r:$h*
the table is newer than the compressed file, regardless of the state of the history file
$r:$z*
the table exists, apparently the history and compressed files do not
$r*
the history exists, the table and compressed files do not
$h*
the compressed history exists, the table an history files do not.
$z*
we've overlooked a possilbility, or the argument is not a file
*
Think of this as a state machine. The activity for a single file is deterministic. The default state is the first. If nothing has changed, the function returns.
The state machine operates as follows:
Note, a new table, starts at state 4. There is no internal transition to state 4. From there, the states are 2, 5, 1.
State 6 is entered when the table has been lost or deleted, then is the starting state, then 3, 2, 5, 1.
In state 5, the decision to go to state 1 or 3 depends on the modification time of the history file. The nice thing about the compress <-> uncompress pair is that in either operation the created file adopts the modification time of the generating file. A nice touch, if ever their were one.
For fun, here are the tables in the wordcount directory:
table | nrecrds | fields |
---|---|---|
foo.rdb | 6 | bar zot gum |
tables.rdb | 1 | table nrecrds fields |
test.rdb | 0 | first 2nd |
wc_dailydiff.rdb | 88 | i_date difs |
wc_dailymiss.rdb | 0 | i_date difs |
wordcount.rdb | 4462 | lines words chars file |
And one last, while were at it, a pair of functions fixlib
and locallib
, both of which locate files with their name:
fixlib ()
{
: date: 2019-08-31;
find $(home) -name $(myname) | usefulfiles
}
locallib ()
{
: date: 2019-08-31;
find $(home) -name $(myname) | usefulfiles
}
With only one comment: the generic find_me, untested:
find_me ()
{
: date: 2019-08-31;
find $(home) -name $(myname 2) | usefulfiles
}
orgheader.org () { find_me; }
without further comment. The possibilities are endless
Possibly getting carried away, but here's a layer on top
$ nprov "findme 'f*.m4'" "declare -f nprov findme f*.m4" "f*.m4"
$ findme f*.m4
$ declare -f nprov findme f*.m4
nprov ()
{
: date: 2019-07-01;
app_trace $# $@;
echo "$ $1";
eval "$1";
[[ $# -gt 1 ]] && {
shift;
$(myname) "$@"
}
}
findme ()
{
report_notargcount 1 $#;
set -- "$1" $(fuse find_me | nf eq 2 | tail -1);
app_trace $@;
declare -f $2 | sed "s/$2/$1/g" > .find.sh;
. ./.find.sh
}
f*.m4 ()
{
find_me
}
$ f*.m4
/Users/martymcgowan/Dropbox/include/funclib.m4
/Users/martymcgowan/Dropbox/include/filehndl.m4
/Users/martymcgowan/Dropbox/m4site/field_struct.m4
/Users/martymcgowan/Dropbox/m4site/html/field_struct.m4
/Users/martymcgowan/Dropbox/src/ittoolbox/factorExercise/factorAScript.m4
This all relies (fuse find_me) on first having defined a find_me manually. The nprov function is thrown in for good measure. It captured all these steps with the last command. The commands in sequence:
The secret sauce, in case you haven't figured it out is to 'nprov' the whole nprov … thusly:
$ nprov '''nprov ....''' | tee ~/.x
where I picked up the results. This took a bit experimenting to next everything properly. The only other manual intervention was to put back the single quotes around the "f*.m4" argument in the output.
The big insight over the weekend: that with rdb_canon, it is possible to add a field to the database, and have the history absorb this, if done with care. You might even say, making an accurate record of the history including field name changes, additions, and deletions is part of the requirements.
So, what is the trick. Since rdb_canon is a state machine, depening on which of the three instances of the table are available, then the place to absorb the requirement is whenever the history file is being updated, make sure it recognizes the current list of fields in the table itself.
The first thing I realize is "done with care" requires more thinking.
Any history has to be unpacked, then the fields installed in a way that doesn't cause every record to be changed.
So, back to the drawing board, and take care of some errands before getting more stuck.
It's coming in to view: the Holy Grail. Starting with a FAPI lib – the Function's API, through the good graces of OrgMode, it's focus now is a GRAF lib.
My Dropbox/lib/graf how hosts a growing handful of OrmMode files outlining the function call graph of a collection of functions.
The sfg
function collects their names thusly:
Then graf_fun
does the heavy lifting:
$ graf_fun graf.out $(sfg graf) # or,
$ graf_fun apps.out $(sfg app) # ...
Here's graf_fun
, with plenty of app_trace
:
graf_fun ()
{
: produces the call graph for a list of functions, the MAIN program;
: an optional first argument NAME.out gives the HANDLE of the output files;
: the list is collected by app_fun fun ... tee .fun.app;
:;
: date: 2019-09-03;
function fcg ()
{
printf "#+OPTIONS: ^:nil\n\n";
fun_callgraf $(< $1) 2> $2 | sed '
# OrgMode link to ../fapi/src/FUNCTION
s/\([a-zA-Z0-9-][a-zA-Z0-9_-]*\)/[[..\/fapi\/src\/\1][\1]]/
'
};
report_notargcount 1 $# "[ handle.out ] function ..." && return 1;
local handle=$1;
[[ $1 = ${1%.out}.out ]] && {
handle=${1%.out};
shift
};
report_notfunction $1 && return 1;
local file=$(graf_lib)/$handle.org;
local funs=${file%.org}.fun;
local fuse=${file%.org}.use;
local erro=${file%.org}.err;
app_trace handle: $handle, ARGS ${*:2:12};
app_fun $* | tee $funs | grep -v ^_ > $fuse;
app_trace $(wc -l $funs $fuse);
fcg $fuse $erro > $file;
app_trace $(wc -l $erro $file);
comment $file $funs
}
app_fmgraf ()
{
runfrom $(commonplace)/lib $* || return;
set -- $1 graf/$1.fun $(needir $(commonplace)/bin)/${1}_app;
indir fapi/src cat $(< $2 ) | tee $3
}
And, at the moment, a hook to the graf.org. A cursory test of the app's functions standing alone says the resulting app is self-sufficient.
$ graf_fun graf.out $(sfg graf) # writes the graf.org
$ declare -f $(app_fun app_fmgraf) > .x # saves the minimal
$ unset $(sfg) # clears all functions
$ source .x # reloads the creator
$ graf_fun ... # reproduces the result ...
Here's a quick fix: qfix, followed by usage examples. The challenge was to change some function names to more meaning full instances, thus:
former name | updated name | what it really means |
backup_dir | backup_allfiles | backup all the files in the directory |
dir_backup | backup_directory | an environmentally dependent name |
dir_version | version_directory | … |
The central function backup_dirver
is called by any of the backup family, to assign names of the backup and version tree directories. The default for my own work takes advantage of the hidden names feature of the Unix, a and like operating systems, namely .bak
and .ver
. For windows file systems, back
and version
are the corresponding names. To claim these names, a user needs to sent an environment variable: USER_BACKUP_AREA
.
backup_dirver ()
{
: default BACKUP is .bak ... directory;
: default VERSION is .ver directory;
: to use NON-DOT directory names, set USER_BACKUP_AREA;
: to place in the hierarchy like "/Windows/Fileshare";
: start by clearing any default;
: date: 2019-11-16;
:;
: ${USER_BACKUP_AREA:=/dev/null};
set .bak .ver back version;
case $PWD in
*/WantaNonDotName* | *${USER_BACKUP_AREA}*)
: shift off the defaults;
shift 2
;;
esac;
setenv DIR_BACKUP $1 > /dev/null;
setenv DIR_VERSION $2 > /dev/null;
echo $*
}
qfix ()
{
: quick fix a list of functions, defaulting to the local FIXLIB;
: date: 2019-11-16;
: date: 2019-12-08;
set -- $(functions fixlib 2>/dev/null) $*;
: ------------------------ put DOT, the local directory on the PATH --;
fix_path;
app_trace WHICH $(which fixlib);
report_notargcount 1 $# $(myname) function ... && return 1;
: ----------------------------------------- each FUNCITON, only ONCE --;
qf $(args_uniq $*) | tee fixlib;
emacs fixlib;
. ./fixlib;
: --------------------------------------------- save a local BACKUP --;
backup_lib ./fixlib;
set -- $(functions ./fixlib);
declare -f $*;
comment If Satisfied, RUN fix_clean
}
The great discovery here, the function qfix collects functions in a local library, fixlib
. Named functions are appended to the library, which is then edited, sourced, and backed up.
Note, in the following sequence, the dir_backup and dir_version functions are added and edited. The function's names may be changed in the fixlib
file, since existing copies of the functions remain. The big breakthrough comes in the use of fuse. The result of the command puts the functions which use dir_backup and dir_version on the command line, and thus appends them to the fixlib. It's easy for an editor to make all the needed changes in one place. This is preferred over a stream edit; there may be occasions which don't required changes. In my case, a tag may want to refer to the prior function name.
$ qfix backup_dir dir_{backup,version}
$ qfix backup_dir
$ qfix $(fuse dir_backup | field 1 | sort -u)
$ qfix $(fuse dir_version | field 1 | sort -u)
set swdiary-20{19,20}.org; ! -f $2 && ( sed 4q $1; grep '^* ' $1 | grep 2019) | sed 's/2019/2020/' |tee $2