Merge branch 'master' of git+ssh://github.com/adefaria/clearscm

author Andrew DeFaria <A.DeFaria@cpanel.net>

Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)

committer Andrew DeFaria <A.DeFaria@cpanel.net>

Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)
author Andrew DeFaria <A.DeFaria@cpanel.net>
Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)
committer Andrew DeFaria <A.DeFaria@cpanel.net>
Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)
diff --git a/bin/simple_google_tts b/bin/simple_google_tts

new file mode 100755 (executable)

index 0000000..8dc3e98
--- /dev/null
+++ b/bin/simple_google_tts
@@ -0,0 +1,327 @@
+#!/bin/bash
+
+# NAME:         Simple Google TTS
+# VERSION:      0.1
+# AUTHOR:       (c) 2014 - 2016 Glutanimate <https://github.com/Glutanimate/>
+# DESCRIPTION:  Wrapper script for Michal Fapso's speak.pl Google TTS script
+# DEPENDENCIES: - wrapper: xsel libttspico0 libttspico-utils libttspico-data libnotify-bin
+#               - speak.pl: libwww-perl libwww-mechanize-perl libhtml-tree-perl sox libsox-fmt-mp3
+#
+# LICENSE:      GNU GPLv3 (http://www.gnu.de/documents/gpl-3.0.en.html)
+#
+# NOTICE:       THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 
+#               EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
+#               PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR 
+#               IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
+#               AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND 
+#               PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE,
+#               YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+#
+#               IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY 
+#               COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS 
+#               PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, 
+#               INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE 
+#               THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
+#               INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE 
+#               PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER 
+#               PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+#
+# USAGE:        simple_google_tts [-p|-g|-h] languagecode ['strings'|'file.txt']
+#
+#               please consult the README or the help output (-h) for more information
+
+############# GLOBVAR/PREP ###############
+
+ScriptPath="$(readlink -f "$0")"
+ScriptBase="$(basename "$0")"
+ParentPath="${ScriptPath%/*}"
+speakpl="$ParentPath/speak.pl"
+
+TOP_PID="$$"
+PidFile="/tmp/${0##*/}.pid"
+
+
+############### SETTINGS #################
+
+Player="play"
+
+##############  DIALOGS  #################
+
+Usage="\
+$(basename "$0") [-p|-g|-h] languagecode ['strings'|'file.txt']
+
+    -p:   use offline TTS (pico2wave) instead of Google's TTS system
+    -g:   activate gui notifications (via notify-send)
+    -h:   display this help section
+
+    Selection of valid language codes: en, es, de...
+    Check speak.pl for a list of all valid codes
+
+    Warning: offline TTS only supports en, de, es, fr, it
+
+    If an instance of the script is already running it will be terminated.
+
+    If you don't provide an input string or input file, $(basename "$0")
+    will read from the X selection (current/last highlighted text)\
+"
+
+GuiIcon="orca"
+GuiTitle="Google TTS script"
+
+MsgErrNoSpeakpl="Error: speak.pl not found. Falling back to offline playback."
+MsgErrDeps="Error: missing dependencies. Couldn't find:"
+MsgInfoExistInstance="Aborting synthesis and playback of existing script instance"
+MsgErrNoLang="Error: No language code provided."
+MsgInfoInpXsel="Reading from X selection."
+MsgInfoInpFile="Reading from text file."
+MsgInfoInpString="Reading from string."
+MsgErrInvalidInput="Error: Invalid input (file might not be a text file)."
+MsgInfoConnOff="No internet connection."
+MsgInfoModePico="Using pico2wave for TTS synthesis."
+MsgInfoModeGoogle="Using Google for TTS synthesis."
+MsgErrInvalidLang="Error: Offline TTS via pico2wave only supports the .\
+following languages: en, de, es, fr, it."
+MsgErrInputEmpty="Error: Input empty."
+MsgInfoSynthesize="Synthesizing virtual speech."
+MsgInfoPlayback="Playing synthesized speech"
+MsgInfoSectionEmpty="Skipping empty paragraph"
+MsgInfoDone="All sections processed. Waiting for playback to finish."
+
+############## FUNCTIONS #################
+
+check_deps () {
+    for i in "$@"; do
+      type "$i" > /dev/null 2>&1 
+      if [[ "$?" != "0" ]]; then
+        MissingDeps+=" $i"
+      fi
+    done
+}
+
+check_environment () {
+    if [[ ! -f "$speakpl" && "$OptOffline" != "1" ]]; then
+      notify "$MsgErrNoSpeakpl"
+      OptOffline="1"
+    fi
+    check_deps sox perl
+    if [[ -n "$MissingDeps" ]]; then
+      notify "${MsgErrDeps}${MissingDeps}"
+      exit 1
+    fi
+}
+
+check_existing_instance(){
+  ExistingPID="$(cat "$PidFile" 2> /dev/null)"
+  if [[ -n "$ExistingPID" ]]; then
+    rm "$PidFile"
+    notify "$MsgInfoExistInstance"
+    kill -s TERM "$ExistingPID"
+    wait "$ExistingPID"
+  fi
+}
+
+arg_evaluate_options(){
+    # grab options if present
+    while getopts "gph" Options; do
+      case $Options in
+        g ) OptNotify="1"
+            ;;
+        p ) OptOffline="1"
+            ;;
+        h ) echo "$Usage"
+            exit 0
+            ;;
+       \? ) echo "$Usage"
+            exit 1
+            ;;
+      esac
+    done
+}
+
+arg_check_input(){
+  if [[ $# -eq 0 ]]; then
+    echo "$MsgErrNoLang"
+    echo "$Usage"
+    exit 1
+  elif [[ $# -eq 1 ]]; then
+    echo "$MsgInfoInpXsel"
+    InputMode="xsel"
+  elif [[ $# -eq 2 ]]; then
+    if [[ -f "$2" && -n "$(file --mime-type -b "$2" | grep text)" ]]; then
+      echo "$MsgInfoInpFile"
+      InputMode="file"
+    elif [[ ! -f "$2" ]]; then
+      echo "$MsgInfoInpString"
+      InputMode="string"
+    else
+      echo "$MsgErrInvalidInput"
+      echo "$Usage"
+      exit 1
+    fi
+  fi
+  LangCode="$1"
+  Input="$2"
+}
+
+notify(){
+  echo "$1"
+  if [[ "$OptNotify" = "1" ]]; then
+    notify-send -i "$GuiIcon" "$GuiTitle" "$1"
+  fi
+}
+
+check_connectivity(){
+  if ! ping -q -w 1 -c 1 \
+    "$(ip r | grep default | cut -d ' ' -f 3)" > /dev/null; then
+    echo "$MsgInfoConnOff"
+    OptOffline="1"
+  fi
+}
+
+set_tts_mode(){
+  if [[ "$OptOffline" = "1" ]]; then
+    echo "$MsgInfoModePico"
+    tts_engine="tts_pico"
+    OutFile="out.wav"
+  else
+    echo "$MsgInfoModeGoogle"
+    tts_engine="tts_google"
+    OutFile="out.mp3"
+  fi
+}
+
+set_input_mode(){
+  if [[ "$InputMode" = "xsel" ]]; then
+    InputText="$(xsel)"
+  elif [[ "$InputMode" = "string" ]]; then
+    InputText="$Input"
+  elif [[ "$InputMode" = "file" ]]; then
+    InputText="$(cat "$Input")"
+  fi
+
+  # check if input is empty or only consists of whitespace
+  if [[ -z "${InputText//[[:space:]]/}" ]]; then
+    notify "$MsgErrInputEmpty"
+    exit 1
+  fi
+}
+
+split_into_paragraphs(){
+  # Newlines aren't reliable indicators of paragraph breaks
+  # (e.g.: PDF files where each line ends with a newline).
+  # Instead we look for lines ending with a full stop and divide
+  # our text input into sections based on that
+  
+  InputTextModded="$(echo "$InputText" | \
+    sed 's/\.$/|/g' | sed 's/^\s*$/|/g' | tr '\n' ' ' | tr '|' '\n')"
+
+  #   - first sed command: replace end-of-line full stops with '|' delimiter
+  #   - second sed command: replace empty lines with same delimiter (e.g.
+  #     to separate text headings from text)
+  #   - subsequent tr commands: remove existing newlines; replace delimiter with
+  #     newlines to prepare for readarray
+  # TODO: find a more elegant and secure way to split the text by
+  # multi-character/regex patterns
+
+  # insert trailing newline to allow for short text fragments
+  readarray TextSections < <(echo -e "$InputTextModded\n")
+
+  # subtract one section because of trailing newline
+  Sections="$((${#TextSections[@]} - 1))"
+
+  # TODO: find a more elegant way to handle short inputs
+}
+
+pico_synth(){
+  pico2wave --wave="$OutFile" --lang="$LangCode" "$1"
+}
+
+speakpl_synth(){
+  "$speakpl" "$LangCode" <(echo "$1") "$OutFile" > /dev/null 2>&1
+}
+
+tts_google(){
+  split_into_paragraphs
+  for i in "${!TextSections[@]}"; do
+    if [[ "$i" = "$Sections" ]]; then
+      echo "$MsgInfoDone"
+      [[ -n "$PlayerPID" ]] && wait "$PlayerPID"
+      break
+    else
+      echo "Processing $((i+1)) out of $Sections paragraphs"
+    fi
+    OutFile="out_$i.mp3"
+    SectionText="${TextSections[$i]}"
+    if [[ -n "${SectionText//[[:space:]]/}" ]]; then
+      speakpl_synth "${TextSections[$i]}"
+      [[ -n "$PlayerPID" ]] && wait "$PlayerPID"
+      [[ -f "out_$((i-1)).mp3" ]] && rm "out_$((i-1)).mp3"
+      echo "$MsgInfoPlayback $((i+1))"
+      echo "Playing $OutFile"
+      #$Player "$OutFile" > /dev/null 2>&1 &
+      $Player "$OutFile"
+      PlayerPID="$!"
+    else
+      echo "$MsgInfoSectionEmpty"
+      continue
+    fi
+  done
+}
+
+tts_pico(){
+  if [[ "$LangCode" = "en" ]]; then
+    LangCode="en-GB"
+  elif [[ "$LangCode" = "de" ]]; then
+    LangCode="de-DE"
+  elif [[ "$LangCode" = "es" ]]; then
+    LangCode="es-ES"
+  elif [[ "$LangCode" = "fr" ]]; then
+    LangCode="fr-FR"
+  elif [[ "$LangCode" = "it" ]]; then
+    LangCode="it-IT"
+  else 
+    echo "$MsgErrInvalidLang"
+    exit 1
+  fi
+  OutFile="out.wav"
+  # pico2wave handles long text inputs and 
+  # fixed formatting line-breaks well enough on its own. 
+  # no need to use split_into_paragraphs()
+  pico_synth "$InputText"
+  echo "$MsgInfoPlayback"
+  $Player "$OutFile" > /dev/null 2>&1
+}
+
+cleanup(){
+  pkill -P "$TOP_PID"
+  [[ -n "$TmpDir" && -d "$TmpDir" ]] && rm -r "$TmpDir"
+  [[ -n "$PidFile" && -f "$PidFile" ]] && rm "$PidFile"
+}
+
+############# INSTANCECHECK ##############
+
+check_existing_instance
+
+############## USGCHECKS #################
+
+arg_evaluate_options "$@"
+shift $((OPTIND-1))
+check_environment
+arg_check_input "$@"
+check_connectivity
+
+############### PREPWORK ##################
+
+echo "$TOP_PID" > "$PidFile"
+
+TmpDir="$(mktemp -d "/tmp/${0##*/}.XXXXXX")"
+cd "$TmpDir"
+
+trap "cleanup; exit" EXIT
+
+################ MAIN ####################
+
+set_tts_mode
+set_input_mode
+notify "$MsgInfoSynthesize"
+"$tts_engine"
diff --git a/bin/speak b/bin/speak

index 7b54945..97aceb1 100755 (executable)
--- a/bin/speak
+++ b/bin/speak
@@ -100,4 +100,4 @@ if ($opts{clipboard}) {
    } # if
  } # if
  
-speak quotemeta $msg;
+speak $msg;
diff --git a/bin/speak.pl b/bin/speak.pl

new file mode 100755 (executable)

index 0000000..3a9106d
--- /dev/null
+++ b/bin/speak.pl
@@ -0,0 +1,443 @@
+#!/usr/bin/perl
+
+#--------------------------------------------------
+#
+# Copyright 2012 Michal Fapso (https://github.com/michalfapso)
+# 
+# Modified by Glutanimate (https://github.com/glutanimate)
+#
+# Usage:
+# ./speak.pl en input.txt output.mp3
+#
+# Prerequisites:
+# sudo apt-get install libwww-perl libwww-mechanize-perl libhtml-tree-perl sox libsox-fmt-mp3
+#
+# Compiling sox:
+# Older versions of sox package might not have the support for mp3 codec,
+# so just download sox from http://sox.sourceforge.net/
+# install packages libmp3lame-dev libmad0-dev
+# and compile sox
+#
+# List of language code names for Google TTS:
+#  af  Afrikaans
+#  sq  Albanian
+#  am  Amharic
+#  ar  Arabic
+#  hy  Armenian
+#  az  Azerbaijani
+#  eu  Basque
+#  be  Belarusian
+#  bn  Bengali
+#  bh  Bihari
+#  bs  Bosnian
+#  br  Breton
+#  bg  Bulgarian
+#  km  Cambodian
+#  ca  Catalan
+#  zh-CN  Chinese (Simplified)
+#  zh-TW  Chinese (Traditional)
+#  co  Corsican
+#  hr  Croatian
+#  cs  Czech
+#  da  Danish
+#  nl  Dutch
+#  en  English
+#  eo  Esperanto
+#  et  Estonian
+#  fo  Faroese
+#  tl  Filipino
+#  fi  Finnish
+#  fr  French
+#  fy  Frisian
+#  gl  Galician
+#  ka  Georgian
+#  de  German
+#  el  Greek
+#  gn  Guarani
+#  gu  Gujarati
+#  ha  Hausa
+#  iw  Hebrew
+#  hi  Hindi
+#  hu  Hungarian
+#  is  Icelandic
+#  id  Indonesian
+#  ia  Interlingua
+#  ga  Irish
+#  it  Italian
+#  ja  Japanese
+#  jw  Javanese
+#  kn  Kannada
+#  kk  Kazakh
+#  rw  Kinyarwanda
+#  rn  Kirundi
+#  ko  Korean
+#  ku  Kurdish
+#  ky  Kyrgyz
+#  lo  Laothian
+#  la  Latin
+#  lv  Latvian
+#  ln  Lingala
+#  lt  Lithuanian
+#  mk  Macedonian
+#  mg  Malagasy
+#  ms  Malay
+#  ml  Malayalam
+#  mt  Maltese
+#  mi  Maori
+#  mr  Marathi
+#  mo  Moldavian
+#  mn  Mongolian
+#  sr-ME  Montenegrin
+#  ne  Nepali
+#  no  Norwegian
+#  nn  Norwegian (Nynorsk)
+#  oc  Occitan
+#  or  Oriya
+#  om  Oromo
+#  ps  Pashto
+#  fa  Persian
+#  pl  Polish
+#  pt-BR  Portuguese (Brazil)
+#  pt-PT  Portuguese (Portugal)
+#  pa  Punjabi
+#  qu  Quechua
+#  ro  Romanian
+#  rm  Romansh
+#  ru  Russian
+#  gd  Scots Gaelic
+#  sr  Serbian
+#  sh  Serbo-Croatian
+#  st  Sesotho
+#  sn  Shona
+#  sd  Sindhi
+#  si  Sinhalese
+#  sk  Slovak
+#  sl  Slovenian
+#  so  Somali
+#  es  Spanish
+#  su  Sundanese
+#  sw  Swahili
+#  sv  Swedish
+#  tg  Tajik
+#  ta  Tamil
+#  tt  Tatar
+#  te  Telugu
+#  th  Thai
+#  ti  Tigrinya
+#  to  Tonga
+#  tr  Turkish
+#  tk  Turkmen
+#  tw  Twi
+#  ug  Uighur
+#  uk  Ukrainian
+#  ur  Urdu
+#  uz  Uzbek
+#  vi  Vietnamese
+#  cy  Welsh
+#  xh  Xhosa
+#  yi  Yiddish
+#  yo  Yoruba
+#  zu  Zulu 
+#--------------------------------------------------
+
+use strict;
+
+use File::Path qw( rmtree );
+use HTTP::Cookies;
+use WWW::Mechanize;
+use LWP;
+use HTML::TreeBuilder;
+use Data::Dumper;
+$Data::Dumper::Maxdepth = 2;
+
+if (scalar(@ARGV) != 3) {
+  print STDERR "Usage: $0 LANGUAGE IN.txt OUT.mp3\n";
+  print STDERR "\n";
+  print STDERR "Examples: \n";
+  print STDERR "    echo \"Hello world\" | ./speak.pl en speech.mp3\n";
+  print STDERR "    cat file.txt       | ./speak.pl en speech.mp3\n";
+  exit;
+}
+
+my $language = $ARGV[0]; # sk | en | cs | ...
+my $textfile_in = $ARGV[1];
+my $all_mp3_out = $ARGV[2];
+
+my $SENTENCE_MAX_CHARACTERS = 100; # limit for google tts
+my $TMP_DIR = "$all_mp3_out.tmp";
+my $RECAPTCHA_URL = "http://www.google.com/sorry/?continue=http%3A%2F%2Ftranslate.google.com%2Ftranslate_tts%3Ftl=en%26q=Your+identity+was+successfuly+confirmed.";
+my $RECAPTCHA_SLEEP_SECONDS = 60;
+my $SYSTEM_WEBBROWSER = "firefox";
+my $MAX_OPENED_FILES = 1000;
+mkdir $TMP_DIR;
+
+my $silence_duration_paragraphs = 0.8;
+my $silence_duration_sentences  = 0.2;
+my $silence_duration_comma      = 0.1;
+my $silence_duration_brace      = 0.1;
+my $silence_duration_semicolon  = 0.2;
+my $silence_duration_words      = 0.05;
+
+my @headers = (
+'Host' => 'translate.google.com',
+'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
+'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+'Accept-Language' => 'en-us,en;q=0.5',
+'Accept-Encoding' => 'gzip,deflate',
+'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+'Keep-Alive' => '300',
+'Connection' => 'keep-alive',
+);
+
+my $cookie_jar = HTTP::Cookies->new(hide_cookie2 => 1);
+
+my $mech = WWW::Mechanize->new(autocheck => 0, cookie_jar => $cookie_jar);
+$mech->agent_alias( 'Windows IE 6' );
+$mech->add_header( "Connection" => "keep-alive" );
+$mech->add_header( "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+$mech->add_header( "Accept-Language" => "en-us;q=0.5,en;q=0.3");
+
+my $browser = LWP::UserAgent->new;
+
+my $referer = "";
+
+my @all_mp3s = ();
+my $sentence_idx = 0;
+my $tts_requests_counter = 0;
+my $sample_rate = 0;
+# For each input line
+open(IN, $textfile_in) or die("ERROR: Can not open file '$textfile_in'");
+while (my $line = <IN>)
+{
+  chomp($line);
+  print "line: $line\n";
+  # Check for empty lines - paragraphs separator
+  if ($line =~ /^\s*$/) {
+    if ($sample_rate != 0) {
+      push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration_paragraphs, $sample_rate);
+    }
+  } else {
+    my @words = split(/\s+/, $line);
+    my $sentence = "";
+    # For each word
+    for (my $i=0; $i<scalar(@words); $i++) 
+    {
+      my $word = $words[$i];
+      $sentence .= " $word"; # add another word to the sentence
+      my $say = 0;
+      my $silence_duration = 0.0;
+      if (length($sentence) >= $SENTENCE_MAX_CHARACTERS) {
+        # Remove the last word;
+        $sentence = substr($sentence, 0, length($sentence)-length($word)-1); 
+        $say = 1;
+        $silence_duration = $silence_duration_words;
+        $i --; # one word back
+      }
+      # If a separator was found
+      elsif (substr($word, length($word)-1, 1) =~ /[.!?]/ ) {
+        $say = 1;
+        $silence_duration = $silence_duration_sentences;
+      }
+      elsif (substr($word, length($word)-1, 1) eq ",") {
+        $say = 1;
+        $silence_duration = $silence_duration_comma;
+      }
+      elsif (substr($word, length($word)-1, 1) eq ";") {
+        $say = 1;
+        $silence_duration = $silence_duration_semicolon;
+      }
+      elsif (substr($word, length($word)-1, 1) eq ")") {
+        $say = 1;
+        $silence_duration = $silence_duration_brace;
+      }
+      # If there are no more words
+      elsif ($i == scalar(@words)-1) {
+        $say = 1;
+        $silence_duration = $silence_duration_words;
+      }
+
+      if ($say) {
+        print "sentence[$tts_requests_counter]: $sentence\n";
+        my $trimmed_mp3 = TrimSilence( SentenceToMp3($sentence, $sentence_idx++) );
+        my $trimmed_mp3_sample_rate = `soxi -r $trimmed_mp3`;
+        chomp($trimmed_mp3_sample_rate);
+        if ($sample_rate == 0) {
+          $sample_rate = $trimmed_mp3_sample_rate;
+        }
+        if ($sample_rate != $trimmed_mp3_sample_rate) {
+          die("Error: sample rate of '$trimmed_mp3' differs from the sample rate of previous files.");
+        }
+        #print "trimmed_mp3_sample_rate: $trimmed_mp3_sample_rate\n";
+        push @all_mp3s, $trimmed_mp3;
+        push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration, $sample_rate);
+        $tts_requests_counter ++;
+        $sentence = ""; # start a new sentence
+      }
+    }
+  }
+}
+
+print "Concatenate: @all_mp3s\n";
+print "Writing output to $all_mp3_out...";
+JoinMp3s(\@all_mp3s, $all_mp3_out);
+print "done\n";
+rmtree( $TMP_DIR );
+
+sub JoinMp3s() {
+  my $mp3s_ref = shift;
+  my $mp3_out = shift;
+  my $depth = shift || 0;
+
+#  print "JoinMp3s(".join(" ",@{$mp3s_ref}).", $mp3_out, $depth)\n";
+
+  #--------------------------------------------------
+  # Problem if the number of mp3s exceeds the max number of opened files per process
+  # The audio files should be concatenated by smaller chunks 
+  #--------------------------------------------------
+  if (scalar(@{$mp3s_ref}) < $MAX_OPENED_FILES) {
+    Exec("sox @{$mp3s_ref} $mp3_out");
+  } else {
+    my @subset_mp3s_out = ();
+    my @subset_mp3s = ();
+    my $sub_idx = 0;
+    for (my $i = 0; $i < scalar(@{$mp3s_ref}); $i++) {
+      push (@subset_mp3s, $mp3s_ref->[$i]);
+      if (scalar(@subset_mp3s) >= $MAX_OPENED_FILES-1 || $i == scalar(@{$mp3s_ref})-1) {
+        my $sub_mp3_out = "$TMP_DIR/subjoin_".$depth."_$sub_idx.mp3"; $sub_idx++;
+        JoinMp3s(\@subset_mp3s, $sub_mp3_out, $depth+1);
+        push (@subset_mp3s_out, $sub_mp3_out);
+        @subset_mp3s = ();
+      }
+    }
+    JoinMp3s(\@subset_mp3s_out, $mp3_out, $depth+1);
+  }
+}
+
+sub SilenceToMp3() {
+  my $idx = shift;
+  my $duration = shift;
+  my $sample_rate = shift;
+
+  my $mp3_out = sprintf("$TMP_DIR/%04d_sil.mp3", $sentence_idx);
+  Exec("sox -n -r $sample_rate $mp3_out trim 0.0 $duration");
+  return $mp3_out;
+}
+
+sub SentenceToMp3() {
+  my $sentence     = shift;
+  my $sentence_idx = shift;
+
+  $sentence =~ s/ /+/g;
+  if (length($sentence) > $SENTENCE_MAX_CHARACTERS) {
+    die ("ERROR: sentence has more than $SENTENCE_MAX_CHARACTERS characters: '$sentence'");
+  }
+
+  my $mp3_out = sprintf("$TMP_DIR/%04d.mp3", $sentence_idx);
+
+  my $resp = GetSentenceResponse_CaptchaAware($sentence); # NOT WORKING YET
+
+  if (length($resp) == 0) {
+    print "EMPTY SENTENCE: '$sentence'\n";
+    return "";
+  }
+  open(FILE,">$mp3_out");
+  print FILE $resp;
+  close(FILE);
+  return $mp3_out;
+}
+
+sub GetSentenceResponse() {
+  my $sentence = shift;
+  my $amptk = int(rand(1000000)) . '|' . int(rand(1000000));
+  my $resp = $browser->get("https://translate.google.com/translate_tts?ie=UTF-8&tl=$language&q=$sentence&total=1&idx=0&client=tw-ob&tk=$amptk");
+
+  if ($resp->content =~ "^<!DOCTYPE" ||
+    $resp->content =~ "^<html>") 
+  {
+    die("ERROR: expecting MP3 data, but got a HTML page!");
+  }
+  return $resp->content;
+}
+
+sub GetSentenceResponse_CaptchaAware() {
+  my $sentence = shift;
+
+  my $recaptcha_waiting = 0;
+  print "URL: https://translate.google.com/translate_tts?ie=UTF-8&tl=$language&q=$sentence&total=1&idx=0&client=tw-ob\n";
+  while (1) {
+    my $amptk = int(rand(1000000)) . '|' . int(rand(1000000));
+    my $url = "https://translate.google.com/translate_tts?ie=UTF-8&tl=$language&q=$sentence&total=1&idx=0&client=tw-ob&tk=$amptk";
+    $mech->get($url); $mech->add_header( Referer => "$referer" ); $referer = $url;
+    if ($mech->response()->content() =~ /^<!DOCTYPE/ || 
+      $mech->response()->content() =~ /^<html>/) 
+    {
+      my $tree = HTML::TreeBuilder->new();
+      $tree->parse_content($mech->response()->content());
+      print "HTML response: ".$tree->as_text()."\n";
+
+      if (!$recaptcha_waiting) {
+        $recaptcha_waiting = 1; 
+        print "We have to wait\n";
+      }
+      print ".";
+      sleep($RECAPTCHA_SLEEP_SECONDS);
+      next;
+
+      my $captcha_img_url = "http://translate.google.com".$tree->look_down("_tag", "img")->attr("src");
+      print "img: ".$captcha_img_url;
+      my $mech2 = $mech->clone();
+      $referer = "http://www.google.com/sorry/?continue=$url";
+      $mech2->add_header( Referer => "$referer" );
+      $mech2->get($captcha_img_url, ':content_file' => 'captcha.jpg'); 
+      
+#      print "\n\n".$mech->response()->content()."\n\n";
+  
+      print "enter captcha here: ";
+      my $val = <STDIN>;
+      print "val: $val\n";
+
+      # TODO: THIS DOES NOT WORK! MAYBE WAITING FOR HALF AN HOUR WOULD BE BETTER
+      $mech->add_header( Referer => "$referer" );
+      my $res = $mech->submit_form(with_fields => {captcha => "$val"});
+      print "response: ".$res->content."\n";
+    } else {
+#      print "MP3 response\n";
+      last;
+    }
+    sleep($RECAPTCHA_SLEEP_SECONDS);
+    PrintWaitingDot();
+  }
+  if ($recaptcha_waiting) { print "\n"; }
+  return $mech->response()->content();
+}
+
+sub PrintWaitingDot() {
+  select STDOUT;
+  print ".";
+  $|=1;
+}
+
+sub TrimSilence() {
+  my $mp3 = shift;
+
+  if ($mp3 eq "") {
+    return "";
+  }
+
+  my $mp3_out = $mp3;
+  $mp3_out =~ s/\.mp3$/_trim.mp3/;
+  Exec("
+  sox $mp3 -p silence 1 0.1 -60d \\
+  | sox -p -p reverse \\
+  | sox -p -p silence 1 0.1 -60d \\
+  | sox -p $mp3_out reverse
+  ");
+  return $mp3_out;
+}
+
+sub Exec() {
+  my $cmd = shift;
+#  print "exec $cmd\n";
+  system $cmd;
+  return;
+}
diff --git a/lib/Speak.pm b/lib/Speak.pm

index c98feca..3a01ba1 100644 (file)
--- a/lib/Speak.pm
+++ b/lib/Speak.pm
@@ -126,6 +126,13 @@ Returns:
    # Log message to log file if $log was passed in.
    $log->msg($msg);
  
+  # Quote the message
+  #$msg = quotemeta $msg;
+
+  # Change some characters that mess up speech
+  $msg =~ s#\"#\\"#g;
+  $msg =~ s#\$#\\\$#g;
+
    my ($status, @output) = Execute "/usr/local/bin/gt \"$msg\"";
  
    if ($status) {
diff --git a/maps/bin/maps b/maps/bin/maps

index fc0a65f..594d2d9 100755 (executable)
--- a/maps/bin/maps
+++ b/maps/bin/maps
@@ -161,7 +161,7 @@ sub ProcessMsgs ($$$) {
      if ($msgInfo{sender} eq $user_email and
              (lc ($msgInfo{sender_long}) !~ lc ("\"$username\" <$user_email>") and
               lc ($msgInfo{sender_long}) !~ lc ("$username <$user_email>"))) {
-      $log->msg("Nulllisting message from sender ($msgInfo{sender_long}) pretending to be $user_email");
+      $log->msg("Nulllisting message from sender ($msgInfo{sender_long}) pretending to be $user_email - Subject: $msgInfo{subject}");
  
        next;
      } # if
@@ -169,7 +169,7 @@ sub ProcessMsgs ($$$) {
      # Discard messges coming from andrew@clearscm.com because I don't send from
      # that email address
      if (lc $msgInfo{to} eq 'andrew@clearscm.com') {
-      $log->msg("Nulllisting message from Andrew\@ClearSCM.com since I don't send from that email address");
+      $log->msg("Nulllisting message from Andrew\@ClearSCM.com since I don't send from that email address - Subject: $msgInfo{subject}");
  
        next;
      } # if
@@ -179,11 +179,11 @@ sub ProcessMsgs ($$$) {
  
      if ($onlist) {
        if (ValidDomainUser $msgInfo{sender}) {
-        $log->msg("Whitelisting $msgInfo{sender} - Rule: " . formatRule($rec));
+        $log->msg("Whitelisting $msgInfo{sender} - Rule: " . formatRule($rec) . " - Subject: $msgInfo{subject}");
  
          Whitelist $msgInfo{sender}, $msgInfo{data}, $rec->{sequence}, $rec->{hit_count};
        } else {
-        $log->msg("Sender ($msgInfo{sender}) from this domain but user not found");
+        $log->msg("Sender ($msgInfo{sender}) from this domain but user not found - Subject: $msgInfo{subject}");
  
          Nulllist $msgInfo{sender};
        } # if
@@ -195,7 +195,7 @@ sub ProcessMsgs ($$$) {
      ($onlist, $rec) = OnBlacklist $msgInfo{sender};
  
      if ($onlist) {
-      $log->msg("Blacklisting $msgInfo{sender} - Rule: " . formatRule($rec));
+      $log->msg("Blacklisting $msgInfo{sender} - Rule: " . formatRule($rec) . " - Subject: $msgInfo{subject}");
  
        Blacklist(
          userid    => $userid,
@@ -212,7 +212,7 @@ sub ProcessMsgs ($$$) {
      ($onlist, $rec) = OnNulllist $msgInfo{sender};
  
      if ($onlist) {
-      $log->msg("Nulllisting $msgInfo{sender} - Rule: " . formatRule($rec));
+      $log->msg("Nulllisting $msgInfo{sender} - Rule: " . formatRule($rec) . " - Subject: $msgInfo{subject}");
  
        Nulllist $msgInfo{sender}, $rec->{sequence}, $rec->{hit_count};
  
@@ -220,7 +220,7 @@ sub ProcessMsgs ($$$) {
      } # if
  
      # Return processing:
-    $log->msg("Returning message from $msgInfo{sender}");
+    $log->msg("Returning message from $msgInfo{sender} - Subject: $msgInfo{subject}");
  
      ReturnMsg(
        userid   => $userid,
diff --git a/rc/gitignore b/rc/gitignore

index ffc8f7e..f00cab3 100644 (file)
--- a/rc/gitignore
+++ b/rc/gitignore
@@ -2,4 +2,10 @@
  .pydevproject
  .includepath
  .perldb.hist
-/.vscode/
+
+# For some reason something keeps deleting my .gitignore file especially
+# in projects like /opt/songbook so I'm gonna reloate them here
+/opt/songbook/Tonight Only.lst
+/opt/songbook/.pydevproject
+/opt/songbook/.vscode
+/opt/songbook/.project
diff --git a/web/Resumes/Andrew/index.php b/web/Resumes/Andrew/index.php

index ded9b59..9372526 100644 (file)
--- a/web/Resumes/Andrew/index.php
+++ b/web/Resumes/Andrew/index.php
@@ -55,18 +55,18 @@ function stoptimer () {
  
    $count++;
  
-  fclose($resumeHit);
+  //fclose($resumeHit);
  
    $resumeHit = fopen ('.resumehits', 'w');
  
-  fwrite($resumeHit, $count);
-  fclose($resumeHit);
+  //fwrite($resumeHit, $count);
+  //fclose($resumeHit);
  
-  $resumeHist = fopen('.resume.hist', 'a');
+  //$resumeHist = fopen('.resume.hist', 'a');
    $date = date(DATE_RFC822);
  
-  fwrite($resumeHist, "$_SERVER[REMOTE_ADDR] read resume at $date\n");
-  fclose($resumeHist);
+  //fwrite($resumeHist, "$_SERVER[REMOTE_ADDR] read resume at $date\n");
+  //fclose($resumeHist);
  
    $msg  = '<html><body>';
    $msg .= '<h1>Somebody just visited your resume.</h1>';
author	Andrew DeFaria <A.DeFaria@cpanel.net>
	Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)
committer	Andrew DeFaria <A.DeFaria@cpanel.net>
	Mon, 25 Jul 2022 15:30:19 +0000 (08:30 -0700)
bin/simple_google_tts	[new file with mode: 0755]	patch \| blob
bin/speak		patch \| blob \| history
bin/speak.pl	[new file with mode: 0755]	patch \| blob
lib/Speak.pm		patch \| blob \| history
maps/bin/maps		patch \| blob \| history
rc/gitignore		patch \| blob \| history
web/Resumes/Andrew/index.php		patch \| blob \| history