Wednesday, September 17, 2008

Documenting a MySQL database

Today; a quick script I whipped together to create a nice HTML page out of a

mysqldump --no-data
output. Use it from the command line, and give the mysqldump filename as argument. The output is sent to stdout.

bash# php document_mysql_dump.php my_db_dump.sql >my_db_documentation.html

Have fun!


<?
/**
 * MySQL table definition documenting script.
 * Converts mysqldump files to HTML documentation.
 *
 * Freely Available For Now
 *
 * Author: mrOrigo (mrorigo@gmail.com) 2008-09-17
 *
 */

if($argc != 2)
  die(
"ERROR: Need one argument; the MySQL Dump file\n");
?>
<html>
<head>
<style type="text/css">
body { font-family: Verdana, Arial; font-size: 0.75em;}
table {     font-size: 1.0em;}

table.table_columns {}
table.table_columns tr.columns { background: #eeeeee;}
table.table_columns tr.columns td.name { background: #dddddd;}
table.table_columns tr.columns td.extra { color: #555555; background: #dfdfdf;}

table.table_keys {}
table.table_keys tr.keys { background: #eeeeee;}
table.table_keys tr.keys td.name { background: #dddddd;}

table.table_constraints {}
table.table_constraints tr.constraints { background: #eeeeee;}
table.table_constraints tr.constraints td.name { background: #dddddd;}

table.table_extras {}
table.table_extras tr.extras { background: #eeeeee;}
table.table_extras tr.extras td.value { background: pink; }
</style>
</head>
<body>
<?
$file 
$argv[1];
$text file_get_contents($file);
if(!
$text)
  die(
"File not found: $argv[1]\n");

$nm preg_match_all("/CREATE TABLE `(.*?)` \(.(.*?) (ENGINE=.*?);/ms"$text$matches);

$tables = Array();
for(
$i=0$i<$nm$i++) {
  
$definition $matches[2][$i];

  
// "Cheat" the spaces in comments to be &nbsp;, for a few regexps to work
  
$definition preg_replace_callback("/COMMENT '(.*)'/im"
                      
create_function('$a',
                              
'return str_replace(" ", "&nbsp;", $a[0]);'),
                      
$definition);
  
$tables[] = Array("name" => $matches[1][$i],
            
"definition" => parseDefinition($definition),
            
"extra" => $matches[3][$i]);
}

foreach(
$tables as $table) {
  print 
"<p>";
  print 
"<a name='table_".$table["name"]."'>";
  print 
"<h3>Table \"".$table["name"]."\"</h3></a>";
  print 
"<b>Definition</b>";

  print 
"<table cellspacing='0' cellpadding='2' border='1' class='table_columns'>";
  print 
"<tr><th>Column name</th><th>Column type</th><th>Size/Options</th><th>Signed</th><th>Extra</th></tr>";
  foreach(
$table["definition"]["columns"] as $cname => $c)
    print 
"<tr class='columns'><td class='name'>$c[name]</td><td class='type'> $c[type]</td><td class='size'> ".($c["size"]?$c["size"]:"&nbsp;")."</td><td class='signed'> ".($c["signed"] ? $c["signed"] : "&nbsp;"). "</td><td class='extra'> $c[extra]</td></tr>";
  print 
"</table>";

  print 
"<b>Indices</b>";
  print 
"<table cellspacing='0' cellpadding='2' border='1' class='table_keys'>";
  print 
"<tr><th>Index name</th><th>Columns</th></tr>";
  foreach(
$table["definition"]["keys"] as $kname => $k)
    print 
"<tr class='keys'><td class='name'>$k[name]</td><td class='columns'>$k[columns]</td></tr>";
  print 
"</table>";

  if(
count($table["definition"]["constraints"]) > 0) {
    print 
"<b>Constraints</b>";
    print 
"<table cellspacing='0' cellpadding='2' border='1' class='table_constraints'>";
    print 
"<tr><th>Name</th><th>Column</th><th>Foreign Table</th><th>Foreign Column</th><th>Extra</th></tr>";
    foreach(
$table["definition"]["constraints"] as $kname => $k) {
      
$ftlink "<a href='#table_".$k["foreignTable"]."'>".$k["foreignTable"]."</a>";
      print 
"<tr class='constraints'><td class='name'>$k[name]</td><td class='column'>$k[column]</td><td class='foreign_table'>$ftlink</td><td class='foreign_column'>$k[foreignColumn]</td><td class='extra'>$k[extra]</td></tr>";
    }
    print 
"</table>";
  }
  print 
"<b>Table extras</b>";
  print 
"<table cellspacing='0' cellpadding='2' border='1' class='table_extras'>";
  
$extras parseTableExtras($table["extra"]);
  foreach(
$extras as $en => $ev)
    print 
"<tr class='extras'><td class='name'>$en</td><td class='value' colspan=3>$ev</td></tr>";
  print 
"</table>";
}


function 
parseTableExtras($extra)
{
  
$extra preg_replace_callback("/COMMENT='(.*)'/im"
                 
create_function('$a',
                         
'return str_replace(" ", "&nbsp;", $a[0]);'),
                 
$extra);
  
// Split out the extras
  
preg_match_all("/(ENGINE|AUTO_INCREMENT|DEFAULT CHARSET|COMMENT)=([a-zA-Z0-9_'&;!\?]*)/"$extra$matches);  

  
// Make into usable array
  
$extras = Array();
  for(
$i=0;$i<count($matches[0]);$i++)
    
$extras[$matches[1][$i]] = str_replace("&nbsp;"" "$matches[2][$i]);
  return 
$extras;
}

function 
parseDefinition($definition)
{
  
$mysql_types = Array("bit",          "bool",
               
"boolean",      "tinyint",
               
"smallint",      "mediumint",
               
"int",          "integer",
               
"bigint",      "serial",
               
"float",          "double",
               
"decimal",
               
"datetime",        "date",
               
"timestamp",       "time",  "year",
               
"blob",          "char",
               
"varchar",      "enum",
               
"set",          "binary",
               
"varbinary",      "tinyblob",
               
"tinytext",      "blob",
               
"text",          "mediumtext",
               
"mediumblob",      "longtext",
               
"longblob");

  
// Split into columns, keys and constraints
  
preg_match("/^(.*?)(PRIMARY?\ ?KEY.*?)(CONSTRAINT.*)/si"
         
$definition,
         
$matches);
  if(!
$matches[3])  // Some tables don't have constraints..
        
preg_match("/^(.*?)(PRIMARY?\ KEY.*)(CONSTRAINT.*)?/si"
           
$definition,
           
$matches);
  
$columns $matches[1];
  
$keys $matches[2];
  
$constraints $matches[3];

  
// Split the columns
  
preg_match_all("/(.*)\s+(".join("|",$mysql_types).")\(?([a-zA-Z0-9',]*?)\)?\s+(unsigned)?\s?([^,]*)/Si",
         
$columns,
         
$matches);
  
$_c=Array("names" => &$matches[1],
        
"types" => &$matches[2],
        
"sizes" => &$matches[3],
        
"signed" => &$matches[4],
        
"extra" => &$matches[5]);
  
// Transform columns into a more usable array
  
$columns = Array();
  for(
$i=0$i<count($_c); $i++) {
    
$n trim($_c["names"][$i], " `");
    if(
$n != "")
      
$columns[$n] = Array("type" => $_c["types"][$i],
               
"name" => $n,
               
"size" => $_c["sizes"][$i],
               
"signed"=> $_c["signed"][$i],
               
"extra" => $_c["extra"][$i]);
  }

  
// Parse and tidy up keys
  
preg_match_all("/(PRIMARY)?.*?(KEY).*?(.*)?(\(.*\))(.*)/"$keys$matches);
  
$keys = Array();
  for(
$i=0$i<count($matches[0]); $i++) {
    
$n trim($matches[3][$i]," (`)");
    if(
$n == "")
      
$n "PRIMARY";
    
$keys$n ] = Array("name" => $n,
            
"columns" => trim($matches[4][$i], " (`)"));
      };

  
// Parse and tidy up constraints
  
preg_match_all("/(CONSTRAINT)\s+(.*)\s+FOREIGN KEY \((.*)\)\s+REFERENCES\s+(.*)\s+\((.*)\)\s+([^,\)]*)/"$constraints$matches);
  
$constraints = Array();
  for(
$i=0$i<count($matches[0]); $i++) {
    
$n trim($matches[2][$i]," (`)");
    
$constraints$n ] = Array("name" => $n,
                   
"column" => trim($matches[3][$i]," (`)"),
                   
"foreignTable" => trim($matches[4][$i]," (`)"),
                   
"foreignColumn" => trim($matches[5][$i]," (`)"),
                   
"extra" => $matches[6][$i]);
  }

  return Array(
"columns" => $columns,
           
"keys" => $keys,
           
"constraints" => $constraints);
}

?>

Monday, March 17, 2008

All User Input Is Malicious!

If you, like me nowadays, obide by the above statement, you have a much better chance to avoid monday morning calls telling you someone "hacked" your website.

I just got off the phone with someone that has been developing websites for years and years, have multinational corporations as customers, and with serious problems with input validation. In just 5 minutes over the phone, I could access data from several of the websites he had made in a way their data was not supposed to be accessed, simply by inputing malicious data in a few form fields. He was ofcourse chocked, not the monday morning he had expected, but nevertheless he learnt his lesson and started working through the code of his most important customers.

All of this made me think: "How many web site developers with 10 years or more
in the business has the same problem". Normal reasoning and multiplication made
this thought send chills down my spline..

So, for what it's worth, I give you my contribution to safe-up the web a little
bit. It's a variation of the code I have used for several projects to validate
input in PHP. I hope you find it useful, and that you implement it, or
something similar in your projects.


<?php

// Defines used as $method parameter to getPP()
define('PP_GET'1);
define('PP_POST'2);
define('PP_GET_POST'3);
define('PP_POST_GET'4);

function 
getPP($name$format$method PP_GET)
{
  unset(
$first);
  unset(
$second);

  switch (
$method) {
  case 
PP_GET:
    
$first $_GET;
    break;
  case 
PP_POST:
    
$first $_POST;
    break;
  case 
PP_GET_POST:
    
$first $_GET;
    
$second $_POST;
    break;
  case 
PP_POST_GET:
    
$first $_POST;
    
$second $_GET;
    break;
  default:
    
// This function should LOG (& Send Email)
    
internalError("getPP(): Invalid parameter method: $method");
    break;
  }
  if (!isset(
$first))
    
internalError("getPP(): Sanity check failed");

  if (isset(
$first[$name]))
    
$var $first[$name];
  else if (isset(
$second) && isset($second[$name]))
    
$var $second[$name];
  if(isset(
$var) && $format) {
    if(!
checkAttribute($format$var))
      
$var false;
  }

  if(!isset(
$var) || $var == "")
    unset(
$var);

  return @
$var;
}

function 
checkAttribute($name$value)
{
  
// List of known attribute types ($name)
  
$allowedAttributes =
    Array(
"username"        => '^([+~!#\"\ 0-9a-zA-Z_-])*$',
      
"parameter" => '^[A-Za-z 0-9_\.\-]+$',
      
"page"            => '^[a-z_0-9]+$',
      
"email"           => '^[A-Za-z0-9]+[A-Za-z0-9_\.-]*@([a-z0-9]+([\.-][a-z0-9]+)*)\.[a-z]{2,4}$',
      
"common"          => '^[A-Za-z 0-9åäöÅÄÖ\.,/\-_]+$',
      
"password"        => '^([A-Za-z0-9_@!\*&#?\.,-_]){3,}$',
    );
  
$regexp "~".str_replace("~""\\~"$allowedAttributes[$name]) . "~";
  
$regexp utf8_encode($regexp);
  if(
preg_match($regexp$value)!=0) {
    return 
TRUE;
  } else {
    return 
FALSE;
  }
}
?>