Paolo
Paolo

Reputation: 2249

Parsing files to copy the ones that match my criteria

I have files that are to be copied and removed on different days and different times, and the criteria is part of the file's name. I was thinking of using Bash and regex to combine various variables in regular expressions, and simply use mv. But perhaps a loop of some kind where I parse the files, is a better idea.

Say I have a file called: *TuesdayThursdayMonday_1800-1900.txt*

Now let's say $dayofweek is Monday.

I want the criteria to be:

*$dayofweek* must exist before the _ The current time must be more than what's left of dash (1800) AND the current time must be less than what's right of the dash (1900).

If all this is true, do mv on the file.

Upvotes: 0

Views: 84

Answers (1)

ooga
ooga

Reputation: 15501

# Function checkfilename:
#   Usage: checkfilename filename dayofweek [time]
#     filename format: dayname..._timestart-timeend.extension
#     (Underscores can optionally appear between the daynames.)
#   Checks if filename contains dayofweek before the (last) underscore
#   and that time is within the time range after the (last) underscore.
#   If time is not given, the current time is used.
#   Code notes:
#     ${var#patt} Removes patt from beginning of $var.
#     ${var%patt} Removes patt from end of $var.
#     10#num interprets num as decimal even if it begins with a 0.

checkfilename() {
  local file day time days days2 times tstart tend

  file="$1"  # filename
  day="$2"   # day of week

  # Check if the first part of the filename contains day.
  days=${file%_*} # just the days
  days2=${days/$day/} # Remove day from the days.
  # If days == days2 then days didn't contain day; return failure.
  if [ "$days" == "$days2" ]; then return 1; fi

  # Get time from 3rd parameter or from date command
  if (($# >= 3)); then time=10#"$3"
  else time=10#$(date +%H%M); fi  # get time in HHMM format

  times=${file##*_}; times=${times%.*}   # just the times
  tstart=10#${times%-*}; tend=10#${times#*-}

  # If second time is less than first time, add 2400
  ((tend < tstart)) && ((tend+=2400))
  # If current time is less than first time, add 2400
  ((time < tstart)) && ((time+=2400))

  # Check if time is between tstart and tend; return result.
  ((tstart <= time && time <= tend))
  return $?
}

file="TuesdayThursdayMonday_2300-0018.txt"
dayofweek="Thursday"
checkfilename "$file" "$dayofweek" 0005 && echo yep

If the filename has a prefix to extract as well, it can be done like this:

file="1A_Monday_1800-1900.mp4"

ext=${file##*.}           # remove from front longest  string matching *.
file=${file%.*}           # remove from back  shortest string matching .*
prefix=${file%%_*}        # remove from back  longest  string matching _*
days=${file#*_}           # remove from front shortest string matching *_
days=${days%%_*}          # remove from back  longest  string matching _*
times=${file##*_}         # remove from front longest  string matching *_

echo $file
echo $ext
echo $prefix
echo $days
echo $times

Note that in the match patterns, '*' matches any number of any character. '.' matches an actual period and '_' matches an actual underscore. Others are '?', matching any single character, [abcd] matching any one of the contained characters, and [^abcd] (or [!abcd]), matching any character except one of the contained characters.

${var#patt} expands to $var with shortest patt match removed from front.
${var##patt} expands to $var with longest patt match removed from front.
${var%patt} expands to $var with shortest patt match removed from end.
${var%%patt} expands to $var with longest patt match removed from end.

A totally different method, using the IFS (input field separator) shell variable instead of the parameter expansions, splitting the fields on underscore and period into an array.

#!/bin/bash

# Function checkfilename:
#   Usage: checkfilename filename dayofweek [time]
#     filename format: prefix_dayname..._timestart-timeend.extension
#   Checks if filename contains dayofweek between the underscores
#   and that time is within the time range after the second underscore.
#   If time is not given, the current time is used.
#   Code notes:
#     10#num interprets num as decimal even if it begins with a 0.
#     'declare' also makes a variable 'local'
checkfilename() {
  local file="$1"  # filename
  local day="$2"   # day of week

  local IFS='_.'   # Split fields on underscore and period.

  # Split and extract times and days.
  local a=($file)         # Split filename into array.
  local prefix="${a[0]}"  # Set prefix to the first field
  local days="${a[1]}"    # Set days to second field.
  local times="${a[2]}"   # Set times to third field.
  local ext="${a[3]}"     # Set ext to last field.

#  echo -e "\nFile: $file"
#  echo -e "  Prefix: $prefix\n  Days: $days\n  Times: $times\n  Ext: $ext"

  # If days doesn't contains day, return failure.
  if [ "$days" == "${days/$day/}" ]; then return 1; fi

  # Get time from 3rd parameter or from date command
  declare -i time
  if (($# >= 3)); then time=10#"$3"
  else time=10#$(date +%H%M); fi  # Get time in HHMM 24-hr format.

  declare -i tstart=10#${times%-*} tend=10#${times#*-}

  ((tend < tstart)) && ((tend+=2400))
  ((time < tstart)) && ((time+=2400))

  # Check if time is between tstart and tend; return result.
  ((tstart <= time && time <= tend))
  return $?
}

file="1A_TuesdayThursdayMonday_2300-0018.txt"
dayofweek="Thursday"
checkfilename "$file" "$dayofweek" 0005 && echo pass1
checkfilename "$file" "$dayofweek" 0025 || echo pass2
dayofweek="Saturday"
checkfilename "$file" "$dayofweek" 0005 || echo pass3

Upvotes: 3

Related Questions