[Fusionforge-commits] r7917 - in trunk: gforge/common/include gforge/etc tests/unit/utils

Alain Peyrat aljeux at libremir.placard.fr.eu.org
Thu Jul 2 23:02:50 CEST 2009


Author: aljeux
Date: 2009-07-02 23:02:50 +0200 (Thu, 02 Jul 2009)
New Revision: 7917

Added:
   trunk/tests/unit/utils/TextSanitizerTests.php
Modified:
   trunk/gforge/common/include/TextSanitizer.class.php
   trunk/gforge/etc/local.inc.example
   trunk/tests/unit/utils/AllTests.php
   trunk/tests/unit/utils/UtilsTests.php
Log:
Include HTMLPurifier in TextSanitizer class (with unit tests added)

Modified: trunk/gforge/common/include/TextSanitizer.class.php
===================================================================
--- trunk/gforge/common/include/TextSanitizer.class.php	2009-07-02 18:26:08 UTC (rev 7916)
+++ trunk/gforge/common/include/TextSanitizer.class.php	2009-07-02 21:02:50 UTC (rev 7917)
@@ -2,7 +2,8 @@
 /**
  * FusionForge text sanitisation
  *
- * Copyright 2005, Daniel Perez
+ * Copyright (C) 2005, Daniel Perez
+ * Copyright (C) 2008-2009 Alcatel-Lucent
  *
  * This file is part of FusionForge.
  *
@@ -22,6 +23,30 @@
  * USA
  */
 
+/*
+ • Standard Alcatel-Lucent disclaimer for contributing to open source
+ •
+ * "The Style Sheet ("Contribution") has not been tested and/or
+ • validated for release as or in products, combinations with products or
+ • other commercial use. Any use of the Contribution is entirely made at
+ • the user's own responsibility and the user can not rely on any features,
+ • functionalities or performances Alcatel-Lucent has attributed to the
+ • Contribution.
+ •
+ * THE CONTRIBUTION BY ALCATEL-LUCENT IS PROVIDED AS IS, WITHOUT WARRANTY
+ • OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ • WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, COMPLIANCE,
+ • NON-INTERFERENCE AND/OR INTERWORKING WITH THE SOFTWARE TO WHICH THE
+ • CONTRIBUTION HAS BEEN MADE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ • ALCATEL-LUCENT BE LIABLE FOR ANY DAMAGES OR OTHER LIABLITY, WHETHER IN
+ • CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ • CONTRIBUTION OR THE USE OR OTHER DEALINGS IN THE CONTRIBUTION, WHETHER
+ • TOGETHER WITH THE SOFTWARE TO WHICH THE CONTRIBUTION RELATES OR ON A STAND
+ • ALONE BASIS."
+ */
+
+require_once($sys_path_to_htmlpurifier . '/HTMLPurifier.auto.php');
+
 Class TextSanitizer extends Error {
 	
 	
@@ -32,7 +57,7 @@
 	 *	@return  string		The output string
 	 */
 	function convertExtendedCharsForEmail($text) {
-		$text = str_replace("´","'",$text); // it's better to see that char in the email than the html entity
+		$text = str_replace("´","'",$text); //it's better to see that char in the email than the html entity
 		$text = str_replace("&","&",$text);
 		$text = str_replace(""",'"',$text);
 		$text = str_replace(" ",' ',$text);
@@ -41,6 +66,20 @@
 		$text = str_replace("°",'°',$text);
 		$text = str_replace("<br>","\n",$text);
 		$text = str_replace("<br />","\n",$text);
+
+		$text = str_replace("é","é",$text);
+		$text = str_replace("è","è",$text);
+		$text = str_replace("ê","ê",$text);
+		$text = str_replace("ë","ë",$text);
+		$text = str_replace("à","à",$text);
+		$text = str_replace("â","â",$text);
+		$text = str_replace("ç","ç",$text);
+		$text = str_replace("ù","ù",$text);
+		$text = str_replace("û","û",$text);
+		$text = str_replace("ü","ü",$text);
+		$text = str_replace("ô","ô",$text);
+		$text = str_replace("ï","ï",$text);
+
 		return $text;
 	}
 	
@@ -54,6 +93,13 @@
 		$text = str_replace("<br>","\n",$text);
 		$text = str_replace("<br />","\n",$text);
 		$text = str_replace("<br/>","\n",$text);
+		$text = str_replace("<p>","\n",$text);
+		$text = str_replace("</p>","\n",$text);
+		$text = str_replace("<li>","\n - ",$text);
+		$text = str_replace("</li>",'',$text);
+		$text = str_replace("<ul>",'',$text);
+		$text = str_replace("</ul>","\n",$text);
+		$text = str_replace("\xc2\xa0",' ',$text);
 		return $text;
 	}
 	
@@ -102,9 +148,11 @@
                 $input=str_replace('&lt;table&gt;','<table>',$input);
                 $input=str_replace('&lt;table','<table',$input);
                 $input=str_replace('&lt;/table&gt;','</table>',$input);
+                $input=str_replace('&lt;div&gt;','<div>',$input);
                 $input=str_replace('&lt;div','<div',$input);
                 $input=str_replace('&lt;/div&gt;','</div>',$input);
                 $input=str_replace('&lt;u&gt;','<u>',$input);
+                $input=str_replace('&lt;u ','<u ',$input); // rg
                 $input=str_replace('&lt;/u&gt;','</u>',$input);
                 $input=str_replace('&lt;p&gt;','<p>',$input);
                 $input=str_replace('&lt;/p&gt;','</p>',$input);
@@ -128,7 +176,7 @@
                 $input=str_replace('&lt;img ','<img ',$input);
                 $input=str_replace('&lt;textarea ','<textarea ',$input);
                 $input=str_replace('&lt;/textarea&gt;','</textarea>',$input);
-                $input=str_replace('&lt;a href','<a href',$input);
+                $input=str_replace('&lt;a ','<a ',$input);
                 $input=str_replace('&lt;h1&gt;','<h1>',$input);
                 $input=str_replace('&lt;/h1&gt;','</h1>',$input);
                 $input=str_replace('&lt;h2&gt;','<h2>',$input);
@@ -143,15 +191,68 @@
                 $input=str_replace('&lt;/h6&gt;','</h6>',$input);
                 $input=str_replace('&lt;pre&gt;','<pre>',$input);
                 $input=str_replace('&lt;/pre&gt;','</pre>',$input);
+                $input=str_replace('&lt;address&gt;','<address>',$input);
+                $input=str_replace('&lt;/address&gt;','</address>',$input);
                 $input=str_replace('&lt;h1 ','<h1 ',$input);
                 $input=str_replace('&lt;h2 ','<h2 ',$input);
                 $input=str_replace('&lt;h3 ','<h3 ',$input);
                 $input=str_replace('&lt;h4 ','<h4 ',$input);
 		$input=str_replace('&lt;h5 ','<h5 ',$input);
                 $input=str_replace('&lt;h6 ','<h6 ',$input);
-	
-		return $input;
+                $input=str_replace('&rsquo;','\\\'',$input);
+                $input=str_replace('&bull;','-',$input);
+
+                // Allow embbeding video like youtube ones.
+                $input=str_replace('&lt;object ','<object ',$input);
+                $input=str_replace('&lt;/object&gt;','</object>',$input);
+                $input=str_replace('&lt;param ','<param ',$input);
+                $input=str_replace('&lt;/param&gt;','</param>',$input);
+                $input=str_replace('&lt;embed ','<embed ',$input);
+                $input=str_replace('&lt;/embed&gt;','</embed>',$input);
+
+                return $input;
 	}
+
+	function stripTags ($text, $allowed='br,p,li,ul') {
+		$config = HTMLPurifier_Config::createDefault();
+		$config->set('HTML', 'Allowed', $allowed);
+		$purifier = new HTMLPurifier($config);
+		$text = $purifier->purify($text);
+
+		return $text;
+	}
+
+	function purify ($text) {
+		$config = HTMLPurifier_Config::createDefault();
+		//$config->set('HTML','Allowed','a[href|title],strike,sub,span,font,hr,br,tbody,tr,td,table,div,u,p,ul,li,ol,blockquote,em,strong,sup,input,img,textarea,h1,h2,h3,h4,h5,h6,pre,address');
+		$purifier = new HTMLPurifier($config);
+		return $purifier->purify($text);
+	}
+
+	function summarize ($text, $nb_line=4, $truncate=true, $nb_char=145) {
+		$text = $this->stripTags($text);
+		$text = $this->convertNeededTagsForEmail($text);
+		// Remove MS Windows extra char for CR
+		$text = preg_replace('/\r/', '', $text);
+		// Strip CR
+		$text = preg_replace('/\n[\n\s]*/', "\n", $text);
+		$text = trim($text);
+		$arr = explode("\n", $text);
+		$nb_max = count($arr);
+		if ($nb_max > $nb_line) $nb_max = $nb_line;
+		$summary = '';
+		for ($l = 0; $l < $nb_max; $l++) {
+			$summary .= '<br />';
+			if ($truncate == true && $nb_max < $nb_line && $l == $nb_max - 1) {
+				$nb_char = $nb_char * ($nb_line - $nb_max + 1);
+			}
+			$summary .= util_make_links((($truncate == true && strlen($arr[$l]) > $nb_char) ?
+											preg_replace('/[^\s]*$/', ' <b>...</b>', substr($arr[$l], 0, $nb_char), 1) :
+											$arr[$l]));
+		}
+
+		return $summary;
+	}
 }
 
 // Local Variables:

Modified: trunk/gforge/etc/local.inc.example
===================================================================
--- trunk/gforge/etc/local.inc.example	2009-07-02 18:26:08 UTC (rev 7916)
+++ trunk/gforge/etc/local.inc.example	2009-07-02 21:02:50 UTC (rev 7917)
@@ -324,4 +324,6 @@
 //	'Support Tech'=>array( 'projectadmin'=>'0', 'frs'=>'0', 'scm'=>'0', 'docman'=>'1', 'forumadmin'=>'0', 'forum'=>'1', 'trackeradmin'=>'0', 'tracker'=>'2', 'pmadmin'=>'0', 'pm'=>'0' , 'webcal'=>'2')
 //);
 
+$sys_path_to_htmlpurifier = '/usr/share/htmlpurifier';
+
 // End of customizations -- place nothing after this line

Modified: trunk/tests/unit/utils/AllTests.php
===================================================================
--- trunk/tests/unit/utils/AllTests.php	2009-07-02 18:26:08 UTC (rev 7916)
+++ trunk/tests/unit/utils/AllTests.php	2009-07-02 21:02:50 UTC (rev 7917)
@@ -1,8 +1,10 @@
 <?php
+
+$sys_path_to_htmlpurifier = '/usr/share/htmlpurifier';
+
 require_once 'PHPUnit/Framework.php';
- 
 require_once dirname(__FILE__).'/UtilsTests.php';
-// ...
+require_once dirname(__FILE__).'/TextSanitizerTests.php';
  
 class Utils_AllTests
 {
@@ -11,7 +13,7 @@
         $suite = new PHPUnit_Framework_TestSuite('PHPUnit Framework');
  
         $suite->addTestSuite('Utils_Tests');
-        // ...
+        $suite->addTestSuite('TextSanitizerTests');
  
         return $suite;
     }

Added: trunk/tests/unit/utils/TextSanitizerTests.php
===================================================================
--- trunk/tests/unit/utils/TextSanitizerTests.php	                        (rev 0)
+++ trunk/tests/unit/utils/TextSanitizerTests.php	2009-07-02 21:02:50 UTC (rev 7917)
@@ -0,0 +1,49 @@
+<?php
+
+require_once 'PHPUnit/Framework/TestCase.php';
+require_once dirname(__FILE__) . '/../../../gforge/common/include/Error.class.php';
+require_once dirname(__FILE__) . '/../../../gforge/common/include/TextSanitizer.class.php';
+
+/**
+ * Simple tests for the text sanitizer class.
+ *
+ * @package   Tests
+ * @author    Alain Peyrat <aljeux at free.fr>
+ * @copyright 2009 Alain Peyrat. All rights reserved.
+ * @license   GPL License
+ */
+class TextSanitizerTests extends PHPUnit_Framework_TestCase
+{
+	protected $s;
+	
+	function setUp()
+	{
+		$this->s = new TextSanitizer();
+	}
+
+	/**
+	 * test purify on good code.
+	 */
+	public function testPurifyOnValidHtmlCode()
+	{
+		$this->assertEquals($this->s->purify('<h1>A valid message</h1>'), '<h1>A valid message</h1>');
+		$this->assertEquals($this->s->purify('<h1>A <B>valid</B> message</h1>'), '<h1>A <b>valid</b> message</h1>');
+	}
+	
+	/**
+	 * test purify on repairing damaged code.
+	 */
+	public function testPurifyOnInvalidHtmlCode()
+	{
+		$this->assertEquals($this->s->purify('<h1>Missing ending tag'), '<h1>Missing ending tag</h1>');
+		$this->assertEquals($this->s->purify('Invalid <toto> tag'), 'Invalid  tag');
+	}
+	
+	/**
+	 * test purify on malicious code.
+	 */
+	public function testPurifyOnMaliciousHtmlCode()
+	{
+		$this->assertEquals($this->s->purify('Hacker <script>hello</script>'), 'Hacker ');
+	}
+}

Modified: trunk/tests/unit/utils/UtilsTests.php
===================================================================
--- trunk/tests/unit/utils/UtilsTests.php	2009-07-02 18:26:08 UTC (rev 7916)
+++ trunk/tests/unit/utils/UtilsTests.php	2009-07-02 21:02:50 UTC (rev 7917)
@@ -4,40 +4,38 @@
 require_once dirname(__FILE__) . '/../../../gforge/common/include/utils.php';
 
 /**
- * Simple math test class.
+ * Simple tests for the utils library.
  *
- * @package   Example
- * @author    Manuel Pichler <mapi at phpundercontrol.org>
- * @copyright 2007-2008 Manuel Pichler. All rights reserved.
- * @license   http://www.opensource.org/licenses/bsd-license.php  BSD License
- * @version   Release: 0.4.7
- * @link      http://www.phpundercontrol.org/
+ * @package   Tests
+ * @author    Alain Peyrat <aljeux at free.fr>
+ * @copyright 2009 Alain Peyrat. All rights reserved.
+ * @license   GPL License
  */
 class Utils_Tests extends PHPUnit_Framework_TestCase
 {
-    /**
-     * test the validate_email function.
-     */
-    public function testEmail()
-    {
-	$this->assertTrue(validate_email('al at fx.fr'), 'al at fx.fr is a valid email address');
+	/**
+	 * test the validate_email function.
+	 */
+	public function testEmail()
+	{
+		$this->assertTrue(validate_email('al at fx.fr'), 'al at fx.fr is a valid email address');
 
-	$this->assertFalse(validate_email('al @fx.fr'), 'al @fx.fr is not a valid email address');
+		$this->assertFalse(validate_email('al @fx.fr'), 'al @fx.fr is not a valid email address');
 
-	$this->assertFalse(validate_email('al'), 'al is not a valid email address');
-    }
+		$this->assertFalse(validate_email('al'), 'al is not a valid email address');
+	}
 
-    /**
-     * test the validate_hostname function.
-     */
-    public function testHostname()
-    {
-	$this->assertTrue(valid_hostname('myhost.com'), 'myhost.com is a valid hostname.');
+	/**
+	 * test the validate_hostname function.
+	 */
+	public function testHostname()
+	{
+		$this->assertTrue(valid_hostname('myhost.com'), 'myhost.com is a valid hostname.');
 
-	$this->assertTrue(valid_hostname('myhost.com.'), 'myhost.com. is a valid hostname.');
+		$this->assertTrue(valid_hostname('myhost.com.'), 'myhost.com. is a valid hostname.');
 
-	$this->assertFalse(valid_hostname('my host.com'), 'my host.com is not a valid hostname');
+		$this->assertFalse(valid_hostname('my host.com'), 'my host.com is not a valid hostname');
 
-	$this->assertFalse(valid_hostname('O at O'), 'O at O is not a valid hostname');
-    }
+		$this->assertFalse(valid_hostname('O at O'), 'O at O is not a valid hostname');
+	}
 }




More information about the Fusionforge-commits mailing list