Author: Alexander Zangerl <az@debian.org>
Subject: #487980 add support for urlgroups (squid 2.6)

--- a/etc/redirect.rules
+++ b/etc/redirect.rules
@@ -4,11 +4,13 @@
 
 # Syntax: 
 # 
-#	regex|regexi pattern replacement
+#	regex[i] pattern replacement
+#	regex[i]ug urlgroup pattern replacement
 #
 # or
 #
 #	abort .filename_extension
+#	abortug urlgroup .filename_extension
 
 # jesred uses a linear list of redirect rules and terminates on first match, 
 # so the order of rules is important!
@@ -31,12 +33,23 @@
 
 # regex RE [RURL]
 # regexi RE [RURL]
+# regexug URLGROUP RE [RURL]
+# regexiug URLGROUP RE [RURL]
 #
 # regex  ... indicates, that the following RE is case-sensitive
 # regexi ... indicates, that the following RE is case-insensitive
 # RE     ... is the regular expression, which has to match the passed URL to get
 #            rewritten with the following RURL (see regex(7)).
 # RURL   ... if RE matches the passed URL, jesred returns RURL
+# URLGROUP   ... URLGROUP must also match for jesred to return RURL
+
+# To indicate that a client-side redirect should be performed with
+# the new URL prefixing the RURL with "301:" (moved permanently) 
+# or 302: (moved temporarily).
+
+# RURL can also include a "urlgroup" that can subsequently be matched
+# in cache_peer_access and similar ACL driven rules. An urlgroup is
+# returned by prefixing the RURL with "!urlgroup!".  2.6STABLE19
 
 # If RURL is omitted, all URLs which match RE are NOT rewritten.
 # So the following two rules prevent jesred from rewriting matched URLs, but
--- a/main.c
+++ b/main.c
@@ -48,7 +48,7 @@
 #include "version.h"
 #include "rewrite.h"
 
-int pattern_compare(char *,char *, pattern_item *);	/* from rewrite.c */
+int pattern_compare(char *,char *, char *, pattern_item *);	/* from rewrite.c */
 
 static void Usage (void);
 static void GetOptions(int argc, char *argv[]);
@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 /*    int first_run = 1; */
     char buff[BUFSIZE];
     char redirect_url[BUFSIZE];
-    char *url, *src_addr, *ident, *method;
+    char *url, *src_addr, *ident, *method, *urlgroup;
     int finished = 0;
     int buff_status = 0;
     ip_acl *ip_list = NULL;
@@ -118,7 +118,7 @@ int main(int argc, char **argv)
 	    }
 	    /* separate the four fields from the single input line of stdin */
 	    buff_status = parse_buff(buff, &url, &src_addr, &ident, &method,
-				     ip_list, pattern_list);
+				     &urlgroup, ip_list, pattern_list);
 	    /* error during parsing the passed line from squid - no rewrite */
 	    if(buff_status) {
 		puts("");
@@ -133,7 +133,7 @@ int main(int argc, char **argv)
 		continue;
 	    }
 	    /* find a rule for rewriting the URL */
-	    val = pattern_compare(url, redirect_url, pattern_list);
+	    val = pattern_compare(url, urlgroup, redirect_url, pattern_list);
 	    if( val < 1 ) {
 		/* no rule found = 0, or ABORT rule -N */
 		puts("");
--- a/pattern_list.c
+++ b/pattern_list.c
@@ -1,5 +1,5 @@
 /*
- * $Id: pattern_list.c,v 1.2 1998/07/25 02:32:45 elkner Exp $
+ * $Id: pattern_list.c,v 1.7 2008/04/21 01:51:17 nrickerby Exp nrickerby $
  *
  * Author:  Squirm derived      http://www.senet.com.au/squirm/
  * Project: Jesred       http://ivs.cs.uni-magdeburg.de/~elkner/webtools/jesred/
@@ -53,17 +53,23 @@ char * get_accel(char *, int *, int);
 void
 add_to_patterns(char *pattern, pattern_item **plist)
 {
-    char first[BUFSIZE];
-    char second[BUFSIZE];
-    char type[BUFSIZE];
+    char * rgxurl = "";
+    char * repurl = "";
 #ifdef USE_ACCEL
-    char accel[BUFSIZE];
+    char * accel = "";
 #endif
+    char * urlgroup = NULL;
     regex_t compiled;
     pattern_item rpattern;
     int abort_type = 0;
     int stored;
     
+    char type[BUFSIZE];
+    char arg1[BUFSIZE];
+    char arg2[BUFSIZE];
+    char arg3[BUFSIZE];
+    char arg4[BUFSIZE];
+
     /*  The regex_flags that we use are:
 	REG_EXTENDED 
 	REG_NOSUB 
@@ -73,11 +79,9 @@ add_to_patterns(char *pattern, pattern_i
     
     rpattern.type = EXTENDED;
     rpattern.case_sensitive = 1;
-#ifdef USE_ACCEL   
-    stored = sscanf(pattern, "%s %s %s %s", type, first, second, accel);
-#else
-    stored = sscanf(pattern, "%s %s %s", type, first, second);
-#endif
+
+    stored = sscanf(pattern, "%s %s %s %s %s", type, arg1, arg2, arg3, arg4);
+
     if((stored < 2) || (stored > 4)) {
 	mylog(ERROR, "unable to get a pair of patterns in add_to_patterns() "
 	    "for [%s]\n", pattern);
@@ -85,39 +89,100 @@ add_to_patterns(char *pattern, pattern_i
 	return;
     }
   
-    if(stored == 2)
-	strcpy(second, "");
     
-    if(strcmp(type, "abort") == 0) {
+    if ( (strcmp(type, "regexug") == 0) || (strcmp(type, "regexiug") == 0) ) {
+        switch( stored )
+        {
+            case 3 : urlgroup = arg1;
+                     rgxurl = arg2;
+                     break;
+            case 4 : urlgroup = arg1;
+                     rgxurl = arg2;
+                     repurl = arg3;
+                     break;
+#ifdef USE_ACCEL   
+            case 5 : urlgroup = arg1;
+                     rgxurl = arg2;
+                     repurl = arg3;
+                     accel = arg4;
+                     break;
+#endif
+            default  : mylog(ERROR, "unable to parse rule for [%s]\n", pattern);
+                       echo_mode = 1;
+                       break;
+        }		
+    }
+    else if ( (strcmp(type, "regex") == 0) || (strcmp(type, "regexi") == 0) ) {
+        switch( stored )
+        {
+            case 2 : rgxurl = arg1;
+	             strcpy(repurl, "");
+                     break;
+            case 3 : rgxurl = arg1;
+                     repurl = arg2;
+                     break;
+#ifdef USE_ACCEL   
+            case 4 : rgxurl = arg1;
+                     repurl = arg2;
+                     accel = arg3;
+                     break;
+#endif
+            default  : mylog(ERROR, "unable to parse rule for [%s]\n", pattern);
+                       echo_mode = 1;
+                     break;
+        }		
+    }
+    else if (strcmp(type, "abortug") == 0) {
 	rpattern.type = ABORT;
 	abort_type = 1;
+        if ( stored == 3 ) {
+            urlgroup = arg1;
+            rgxurl = arg2;
+        } else {
+            mylog(ERROR, "unable to parse rule for [%s]\n", pattern);
+            echo_mode = 1;
+        }
+    }
+    else if (strcmp(type, "abort") == 0) {
+	rpattern.type = ABORT;
+	abort_type = 1;
+        if ( stored == 2 ) {
+            rgxurl = arg1;
+        } else {
+            mylog(ERROR, "unable to parse rule for [%s]\n", pattern);
+            echo_mode = 1;
+        }
+    }
+    else {
+        mylog(ERROR, "unable to parse rule for [%s]\n", pattern);
+	echo_mode = 1;
     }
   
-    if(strcmp(type, "regexi") == 0) {
+    if((strcmp(type, "regexi") == 0) || (strcmp(type, "regexiug") == 0)) {
 	regex_flags |= REG_ICASE;
 	rpattern.case_sensitive = 0;
     }
     
-    if(regcomp(&compiled, first, regex_flags)) {
-	mylog(ERROR, "Invalid regex [%s] in pattern file\n", first);
+    if(regcomp(&compiled, rgxurl, regex_flags)) {
+	mylog(ERROR, "Invalid regex [%s] in pattern file\n", rgxurl);
 	echo_mode = 1;
 	return;
     }
     rpattern.cpattern = compiled;
-    rpattern.pattern = (char *)malloc(sizeof(char) * (strlen(first) +1));
+    rpattern.pattern = (char *)malloc(sizeof(char) * (strlen(rgxurl) +1));
     if(rpattern.pattern == NULL) {
 	mylog(ERROR, "unable to allocate memory in add_to_patterns()\n");
 	echo_mode = 1;
 	return;
     }
-    strcpy(rpattern.pattern, first);
-    rpattern.replacement = (char *)malloc(sizeof(char) * (strlen(second) +1));
+    strcpy(rpattern.pattern, rgxurl);
+    rpattern.replacement = (char *)malloc(sizeof(char) * (strlen(repurl) +1));
     if(rpattern.replacement == NULL) {
 	mylog(ERROR, "unable to allocate memory in add_to_patterns()\n");
 	echo_mode = 1;
 	return;
     }
-    strcpy(rpattern.replacement, second);
+    strcpy(rpattern.replacement, repurl);
 
 #ifdef USE_ACCEL
     /* use accelerator string if it exists */
@@ -136,6 +201,20 @@ add_to_patterns(char *pattern, pattern_i
 	rpattern.accel = NULL;
     }
 #endif
+
+    if ( urlgroup ) {
+        rpattern.urlgroup = (char *)malloc(sizeof(char) * (strlen(urlgroup) +1));
+        if(rpattern.urlgroup == NULL) {
+            mylog(ERROR, "unable to allocate memory in add_to_patterns()\n");
+            echo_mode = 1;
+            return;
+        }
+        strcpy(rpattern.urlgroup, urlgroup);
+        }
+    else {
+        rpattern.urlgroup = NULL;
+    }
+
     add_to_plist(rpattern, plist);
 }
 
@@ -233,6 +312,7 @@ add_to_plist(pattern_item pattern, patte
     new->accel_type = pattern.accel_type;
 #endif
     new->case_sensitive = pattern.case_sensitive;
+    new->urlgroup = pattern.urlgroup;
     
     /* not sure whether we need to copy each item in the struct */
     new->cpattern = pattern.cpattern;
@@ -268,6 +348,42 @@ int count_parenthesis (char *pattern)
     return (lcount);
 }
 
+#ifdef DEBUG
+void
+print_plist(pattern_item **plist) {
+    pattern_item *curr;
+    pattern_item *new;
+
+    curr = NULL;
+    new = NULL;
+    int count = 0;
+
+    if (! (*plist)) {
+        /* empty list */
+        printf("empty list\n");
+    } else {
+        /* find end of list */
+        curr = *plist;
+        while(curr) {
+            printf("rule %d\n", count);
+            switch( curr->type )
+            {
+                case 1 : printf( "\tnormal\n" ); break;
+                case 2 : printf( "\textended\n" ); break;
+                case 3 : printf( "\tabort\n" ); break;
+	    }
+            printf("\tpattern \"%s\"\n", curr->pattern );
+            printf("\treplacement \"%s\"\n", curr->replacement );
+            printf("\turlgroup \"%s\"\n", curr->urlgroup );
+            curr = curr->next;
+	    count++;
+            printf("\n");
+        }
+    }
+}
+#endif
+
+
 void
 plist_destroy(pattern_item **a)
 {
--- a/pattern_list.h
+++ b/pattern_list.h
@@ -39,6 +39,7 @@
 typedef struct _pattern_item {
     char *pattern;
     char *replacement;
+    char *urlgroup;
     int case_sensitive;
     int type;
 #ifdef USE_ACCEL
--- a/rewrite.c
+++ b/rewrite.c
@@ -61,52 +61,72 @@ static int match_accel(char *, char *, i
 
 int
 parse_buff(char *buff, char **url, char **src_addr, char **ident,
-	     char **method, ip_acl *ip, pattern_item *p)
+	     char **method, char **urlgroup, ip_acl *ip, pattern_item *p)
+	     //char **method, ip_acl *ip, pattern_item *p)
 {
     int c, i;
     struct in_addr address;
     char *token, *new_token;
-    char *end[4];
+    char *end[5];
+    //char **urlgroup;
     
     c = 0;
+    *urlgroup = '\0';
+
     token = strchr(buff,' ');
     if ( token ) {       /* URL */
 	c++;
 	*token = '\0';
 	end[0] = token;
 	*url = buff;
+
 	new_token = strchr(++token,' ');
 	if (new_token) {     /* Address */
 	    c++;
 	    *new_token = '\0';
 	    end[1] = new_token;
 	    *src_addr = token;
+
 	    token = strchr(++new_token,' ');
 	    if (token) {      /* Ident */
 		c++;
 		*token = '\0';
 		end[2] = token;
 		*ident = new_token;
-		new_token = strchr(++token,'\n');
+
+		/* this might be the last token, check for a space
+		 or a newline */
+		if (!( new_token = strchr(++token,' ')))
+			new_token = strchr(token,'\n');
 		if (new_token) { /* Method */
 		    c++;
 		    *new_token = '\0';
 		    end[3] = new_token;
 		    *method = token;
-		    /* Squid 2.6 adds Url-group,
-		       this should make jesred backwards-compatible */
-		    new_token = strchr(token,' '); 
-		    if (new_token)
-		    {
-		       *new_token = '\0';
-		       end[3] = new_token;
-		    }
+
+		    /* this will be the last token, stop at a space or newline
+                       to avoid spaces in urlgroup.  maybe be too rare to 
+                       waste a test on */
+		    if (!( token = strchr(++new_token,' ')))
+			token = strchr(new_token,'\n');
+		    if (token) {      /* urlgroup */
+			c++;
+			*token = '\0';
+			end[4] = token;
+			/* squid sends "-" as indicator for no urlgroup  */
+			if (strcmp(new_token,"-"))
+			   *urlgroup = new_token;
 
 		}
 	    }
+
+	    }
 	}
     }
-    if(c != 4) {
+
+    /* 4 pre 2.6 or no urlgroup
+       5 post 2.6 with a urlgroup */
+    if(( c != 5) && ( c != 4)) {
 	for(i = 0; i < c; i++) {
 	    if ( end[i] )
 		*end[i] = ' ';
@@ -114,6 +134,9 @@ parse_buff(char *buff, char **url, char
 	mylog(ERROR, "incorrect input (%d): %s", c, buff);
 	return 1;
     }
+
+    
+
 #ifdef DEBUG
     mylog(DEBG, "Request: %s %s %s %s\n", *url, *src_addr, *ident, *method);
 #endif    
@@ -159,7 +182,7 @@ parse_buff(char *buff, char **url, char
    < 0 if abort pattern match, 0 if no match found, > 1 pattern match
    if match, the number of the matching rule will be returned */
 int
-pattern_compare(char *url,char *newurl, pattern_item *phead)
+pattern_compare(char *url, char *urlgroup, char *newurl, pattern_item *phead)
 {
     pattern_item *curr;
     int pos;
@@ -173,6 +196,16 @@ pattern_compare(char *url,char *newurl,
 	pattern_no++;
 	matched = 1;
 	/* assume a match until a character isn't the same */
+
+	/* urlgroup is checked ONLY IF the rule has one */
+        if (curr->urlgroup) {
+            if (!urlgroup || strcmp(curr->urlgroup, urlgroup) != 0) 
+	    {
+               matched = 0;
+               continue;
+            } 
+	}
+
 	if(curr->type == ABORT) {
 	    len = strlen(curr->pattern);
 	    pos = strlen(url) - len; /* this is dangerous */
--- a/rewrite.h
+++ b/rewrite.h
@@ -28,7 +28,7 @@
 #ifndef REWRITE_H
 #define REWRITE_H
 
-extern int parse_buff(char *, char **, char **, char **, char **,
+extern int parse_buff(char *, char **, char **, char **, char **, char **,
 		      ip_acl *, pattern_item *);
 
 #endif
