Reputation: 647
I'm trying to write some regex for inserting box drawing characters based on conditionals, but I keep getting the compilation error subpattern name expected.
Here is my code:
int match_pkg_details(char **pkgdetail, char *pkginfo)
{
PCRE2_SPTR pattern = (PCRE2_SPTR)"^(?!Name|Architecture|URL|Licenses|"\
"Installed Size|Packager|Build Date|"\
"Install Date|Install Script|Validated By| *$).*$";
*pkgdetail = malloc(4096); // FIXME malloc in initializer
char *worker = *pkgdetail;
size_t pattern_length = strlen((char *)pattern);
int errornumber;
PCRE2_SIZE erroroffset;
pcre2_code *regex = pcre2_compile(
pattern,
pattern_length,
PCRE2_MULTILINE,
&errornumber,
&erroroffset,
NULL);
if (regex == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
PCRE2_SPTR replacement = (PCRE2_SPTR)"(?(?=^Install Reason) a | ((?=(\\w) b | ((?=(\\s) c )))))";
// if starts with Install Reason replace with bottom line arrow }}}
size_t replacement_length = strlen((char*)replacement);
pcre2_code *replacement_regex = pcre2_compile(
replacement,
replacement_length,
PCRE2_EXTENDED,
&errornumber,
&erroroffset,
NULL);
if (replacement_regex == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
pcre2_match_data *match_data =
pcre2_match_data_create_from_pattern(regex, NULL);
PCRE2_SPTR subject = (PCRE2_SPTR)pkginfo;
size_t length = strlen((char *)subject);
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
ovector[1] = 0;
int rc;
PCRE2_SIZE offset = 0;
uint32_t options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
while (offset < length - 1 && (rc =
pcre2_match(regex, subject, length, offset, options, match_data, NULL)))
{
offset = ovector[1];
options = 0;
if (rc == PCRE2_ERROR_NOMATCH)
{
ovector[1] = offset + 1;
continue;
}
for (int i = 0; i < rc; i++)
{
PCRE2_SIZE worker_len = strlen(worker);
PCRE2_UCHAR output[4096];
PCRE2_SIZE outlen;
int rs = pcre2_substitute(
replacement_regex,
subject,
length,
offset,
PCRE2_SUBSTITUTE_EXTENDED,
NULL,
NULL,
(PCRE2_SPTR)"@",
1,
output,
&outlen);
PCRE2_SPTR substring_start = subject + ovector[2*i];
size_t substring_length = ovector[2*i+1] - ovector[2*i];
snprintf(worker, 4096, "%.*s\n", (int)substring_length, (char*)substring_start);
worker += (int)substring_length + 1;
}
}
pcre2_match_data_free(match_data);
pcre2_code_free(regex);
return 0;
}
The string I'm matching against:
Name : cinnamon
Version : 3.4.6-1
Description : Linux desktop which provides advanced innovative features and
a traditional user experience
Architecture : x86_64
URL : https://github.com/linuxmint/Cinnamon
Licenses : GPL2
Groups : None
Provides : None
Depends On : accountsservice caribou cinnamon-settings-daemon
cinnamon-session cinnamon-translations cjs clutter-gtk
gnome-backgrounds gnome-themes-standard gstreamer
libgnome-keyring libkeybinder3 librsvg muffin
python2-cairo python-dbus python2-dbus python2-pillow
python2-pam python2-pexpect python2-pyinotify python2-lxml
cinnamon-control-center cinnamon-screensaver cinnamon-menus
libgnomekbd network-manager-applet nemo polkit-gnome xapps
python2-gobject
Optional Deps : blueberry: Bluetooth support [installed]
gnome-panel: fallback mode
metacity: fallback mode
system-config-printer: printer settings [installed]
Required By : cinnamon-sound-effects
Optional For : None
Conflicts With : None
Replaces : None
Installed Size : 8.31 MiB
Packager : Antonio Rojas <[email protected]>
Build Date : Sat 09 Sep 2017 05:38:21 AM CDT
Install Date : Sat 09 Sep 2017 11:37:44 AM CDT
Install Reason : Installed as a dependency for another package
Install Script : No
Validated By : Signature
Currently, if I remove the replacement groups I get:
Version : 3.4.6-1
Description : Linux desktop which provides advanced innovative features
and a traditional user experience
Provides : None
Depends On : accountsservice caribou cinnamon-settings-daemon
cinnamon-session cinnamon-translations cjs clutter-gtk gnome-backgrounds
gnome-themes-standard gstreamer libgnome-keyring libkeybinder3 librsvg
muffin python2-cairo python-dbus python2-dbus python2-pillow python2-pam
python2-pexpect python2-pyinotify python2-lxml cinnamon-control-center
cinnamon-screensaver cinnamon-menus libgnomekbd network-manager-applet
nemo polkit-gnome xapps python2-gobject
Optional Deps : blueberry: Bluetooth support [installed]
Required By : cinnamon-sound-effects
Optional For : None
Conflicts With : None
Replaces : None
Install Reason : Installed as a dependency for another package
The intended output looks like:
├─ Version : 3.4.6-1
├─ Description : Linux desktop which provides advanced innovative features
│ and a traditional user experience
├─ Provides : None
├─ Depends On : accountsservice caribou cinnamon-settings-daemon
│ cinnamon-session cinnamon-translations cjs clutter-gtk gnome-backgrounds
│ gnome-themes-standard gstreamer libgnome-keyring libkeybinder3 librsvg
│ muffin python2-cairo python-dbus python2-dbus python2-pillow python2-pam
│ python2-pexpect python2-pyinotify python2-lxml cinnamon-control-center
│ cinnamon-screensaver cinnamon-menus libgnomekbd network-manager-applet
│ nemo polkit-gnome xapps python2-gobject
├─ Optional Deps : blueberry: Bluetooth support [installed]
├─ Required By : cinnamon-sound-effects
├─ Optional For : None
├─ Conflicts With : None
├─ Replaces : None
└─ Install Reason : Installed as a dependency for another package
a, b, and c are just there for testing purposes (I think I should replace them with named capture groups). I'll be breaking the regex_compile
sections out to it's own method once I get the replacement working correctly. How can I replace named groups with pcre2_substitute
?
Upvotes: 2
Views: 2008
Reputation: 51330
You're trying to do your logic in the wrong place. You need to handle it in the substitution pattern, not in the regex pattern itself.
First, let's write a pattern which will identify the different parts of your string:
^(?:
(?<remove>(?:
Name|Architecture|URL|Licenses|
Installed[ ]Size|Packager|Build[ ]Date|
Install[ ]Date|Install[ ]Script|Validated[ ]By
)\s*:[^\n]*\n)
|(?<last>(?=Install[ ]Reason\s*:))
|(?<field>(?=\S))
|(?<cont>(?=\s))
)
That's with the mx
options (PCRE2_MULTILINE | PCRE2_EXTENDED
), but we won't really need PCRE2_EXTENDED
in the C code.
This will identify some parts of the string and fill exactly one named capture group in the result:
remove
for parts to removelast
for that last fieldfield
for other fieldscont
for value continuations (a line without a field label)Next, we'll have to replace each of these parts with a different string:
remove
=> (empty string)last
=> └─
(I'll be using \-
instead in the program below)field
=> ├─
(I'll be using +-
instead in the program below)cont
=> │
(I'll be using |
instead in the program below)We can let PCRE handle that through PCRE2_SUBSTITUTE_EXTENDED
(docs):
The second effect of setting
PCRE2_SUBSTITUTE_EXTENDED
is to add more flexibility to group substitution. The syntax is similar to that used by Bash:${<n>:-<string>} ${<n>:+<string1>:<string2>}
As before,
<n>
may be a group number or a name. The first form specifies a default value. If group<n>
is set, its value is inserted; if not,<string>
is expanded and the result inserted. The second form specifies strings that are expanded and inserted when group<n>
is set or unset, respectively. The first form is just a convenient shorthand for${<n>:+${<n>}:<string>}
So, using that syntax, our replacement string looks like this:
${remove:+:${last:+\\- :${field:++- :${cont:+| :}}}}
Here's a full demo:
#include <stdio.h>
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
PCRE2_SPTR input =
"Name : cinnamon\n"
"Version : 3.4.6-1\n"
"Description : Linux desktop which provides advanced innovative features and\n"
" a traditional user experience\n"
"Architecture : x86_64\n"
"URL : https://github.com/linuxmint/Cinnamon\n"
"Licenses : GPL2\n"
"Groups : None\n"
"Provides : None\n"
"Depends On : accountsservice caribou cinnamon-settings-daemon\n"
" cinnamon-session cinnamon-translations cjs clutter-gtk\n"
" gnome-backgrounds gnome-themes-standard gstreamer \n"
" libgnome-keyring libkeybinder3 librsvg muffin \n"
" python2-cairo python-dbus python2-dbus python2-pillow\n"
" python2-pam python2-pexpect python2-pyinotify python2-lxml\n"
" cinnamon-control-center cinnamon-screensaver cinnamon-menus\n"
" libgnomekbd network-manager-applet nemo polkit-gnome xapps\n"
" python2-gobject\n"
"Optional Deps : blueberry: Bluetooth support [installed]\n"
" gnome-panel: fallback mode\n"
" metacity: fallback mode\n"
" system-config-printer: printer settings [installed]\n"
"Required By : cinnamon-sound-effects\n"
"Optional For : None\n"
"Conflicts With : None\n"
"Replaces : None\n"
"Installed Size : 8.31 MiB\n"
"Packager : Antonio Rojas <[email protected]>\n"
"Build Date : Sat 09 Sep 2017 05:38:21 AM CDT\n"
"Install Date : Sat 09 Sep 2017 11:37:44 AM CDT\n"
"Install Reason : Installed as a dependency for another package\n"
"Install Script : No\n"
"Validated By : Signature\n";
PCRE2_SPTR pattern =
"^(?:"
"(?<remove>(?:"
"Name|Architecture|URL|Licenses|"
"Installed Size|Packager|Build Date|"
"Install Date|Install Script|Validated By"
")\\s*:[^\n]*\n)"
"|(?<last>(?=Install Reason\\s*:))"
"|(?<field>(?=\\S))"
"|(?<cont>(?=\\s))"
")";
PCRE2_SPTR replacement =
"${remove:+:${last:+\\\\- :${field:++- :${cont:+| :}}}}";
static void print_error(int code)
{
PCRE2_UCHAR message[256];
if (pcre2_get_error_message(code, &message, sizeof(message) / sizeof(PCRE2_UCHAR)))
puts(message);
}
int main()
{
pcre2_code *re;
pcre2_match_context *match_context;
int result, error;
PCRE2_SIZE erroffset, outlength;
PCRE2_UCHAR* outbuf;
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, PCRE2_MULTILINE, &error, &erroffset, 0);
if (!re)
{
print_error(error);
return 1;
}
match_context = pcre2_match_context_create(0);
outlength = 0;
result = pcre2_substitute(
re,
input,
PCRE2_ZERO_TERMINATED,
0,
PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED,
0,
match_context,
replacement,
PCRE2_ZERO_TERMINATED,
0,
&outlength
);
if (result != PCRE2_ERROR_NOMEMORY)
{
print_error(result);
return ;
}
outbuf = malloc(outlength * sizeof(PCRE2_UCHAR));
result = pcre2_substitute(
re,
input,
PCRE2_ZERO_TERMINATED,
0,
PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED,
0,
match_context,
replacement,
PCRE2_ZERO_TERMINATED,
outbuf,
&outlength
);
if (result < 0)
{
print_error(result);
return;
}
puts(outbuf);
free(outbuf);
pcre2_match_context_free(match_context);
pcre2_code_free(re);
return 0;
}
The output is:
+- Version : 3.4.6-1
+- Description : Linux desktop which provides advanced innovative features and
| a traditional user experience
+- Groups : None
+- Provides : None
+- Depends On : accountsservice caribou cinnamon-settings-daemon
| cinnamon-session cinnamon-translations cjs clutter-gtk
| gnome-backgrounds gnome-themes-standard gstreamer
| libgnome-keyring libkeybinder3 librsvg muffin
| python2-cairo python-dbus python2-dbus python2-pillow
| python2-pam python2-pexpect python2-pyinotify python2-lxml
| cinnamon-control-center cinnamon-screensaver cinnamon-menus
| libgnomekbd network-manager-applet nemo polkit-gnome xapps
| python2-gobject
+- Optional Deps : blueberry: Bluetooth support [installed]
| gnome-panel: fallback mode
| metacity: fallback mode
| system-config-printer: printer settings [installed]
+- Required By : cinnamon-sound-effects
+- Optional For : None
+- Conflicts With : None
+- Replaces : None
\- Install Reason : Installed as a dependency for another package
I think I should mention that in your case it would certainly be easier to just do the string manipulation by hand rather than going through a regex pattern.
Upvotes: 4