#! /usr/bin/perl -w
#
# machid.pl -- create an anonymous hash unique for this machine.
#
# Copyright 2005, jw@suse.de.
# Distribute under GPL. No warranties. It may fail any time.
#
# Algorithm:
# Read several machine specific data, ignore part of these, and return the half of the MD5 
# digest of the rest.
#
# The data we read are:
#  * macadresses,
#  * /proc/cpuinfo (those parts that don't change over time).
#  * cpuid (if we can).
#  * hard disk serial numbers (if found).
#
# From this data we return a md5 hash.
#
# We drop information at every stage, in order to frustrate any attempt of
# reconstruction of the input. The ID shall only serve one purpose:
# If two IDs are identical, it is reasonable to assume they come from the same machine.
# In the first stage (reading data), we drop 2 out of 8 bytes or more.
# in the second stage (print data), we use a digest that is believed to be nonreverible.
# and print only half of the digest.
#
# We only use hardware that can be assumed to be constant over a longer time.
# We are easily fooled by removing/adding usb-etherent devices, as we account for all
# mac-addressses, though.
#
# To counter this, we write the hash to file and return the hash from there if 
# the file is found.
# Thus the hash survives a hardware change OR a fresh install (= file not found),
#  but not both simultaneously.
#
# We are resistant against hanging in device-reads: 
# We wait for completion at most 5 seconds, then we automatically background.
#
# Dependencies:
# Hard: perl.rpm(/usr/bin/perl, Digest::MD5, POSIX)
# Soft: iproute2.rpm(/sbin/ip), net-tools.rpm(/sbin/ifconfig), 
#       util-linux.rpm(/bin/mount), module-init-tools.rpm(/sbin/modprobe)
#       /proc/cpuinfo, /var/lib/hardware/unique-keys/*
#
#
# $Id: machid_hash,v 1.1.2.1 2005/04/20 10:42:49 ma Exp $
##########################################################
# 2005-04-05, jw -- initial draft
# 2005-04-07, jw -- Version 1.0 fully functional.
#

use POSIX;		# comes at no cost, it is part of perl
use Digest::MD5;	# comes at no cost, it is part of perl
use Config;		# comes at no cost, it is part of perl

no locale;		# Don't get fooled by any locale settings ...
$ENV{LC_ALL} = 'C';	# For the things we fork.
$ENV{LANG} = 'C';	# Do you know what 'Sendewarteschlangenlnge' is?

die "$0: must be root.\n" if $> and !defined $ENV{MACHID_NONROOT};

my $machid_file 	= $ENV{MACHID_FILE} || "/var/lib/YaST2/machid";
my $min_len_data 	= 50;
my $min_len_digest 	= 16;

my $archname		= $Config{archname};
my $arch_pc  = 1 if $archname =~ m{^(x86|i\d86)}i;
my $arch_ipf = 1 if $archname =~ m{^(ia64)}i;

if (-f $machid_file && -s _ >= $min_len_digest)
  {
    if (open IN, "<$machid_file")
      {
	my $digest = <IN>;
	close IN;

	chomp $digest;
	print "read $machid_file: $digest\n" if $ENV{MACHID_VERBOSE};
	if (length($digest) >= $min_len_digest)
	  {
	    print "$digest\n";
	    exit 0;
	  }
      }
    else
      {
	warn "cannot read $machid_file: $!\n";
      }
  }

if (my $childpid = fork())
  {
    for (1..5)
      {
        sleep(1);
	last if POSIX::waitpid($childpid, POSIX::WNOHANG);
      }
    exit 0;
  }

# we are in a backgrounded child process now.

# Do we need to fully daemonize?
# If so we cannot print the hash befor exiting.
POSIX::setsid();

my $macaddr    = read_macaddr();
my $cpuinfo    = read_cpuinfo();
my $cpuid      = read_cpuid();
my $bioscsum   = read_bioscsum();
my $diskserial = read_diskserial();

if ($ENV{MACHID_VERBOSE})
  {
    print STDERR "macaddr:    $macaddr\n";
    print STDERR "cpuinfo:    $cpuinfo\n";
    print STDERR "cpuid:      $cpuid\n";
    print STDERR "bioscsum:   $bioscsum\n";
    print STDERR "diskserial: $diskserial\n";
  }

## take most things twice, except $cpuinfo, which is usually long already
my $all = $macaddr.$macaddr.$cpuinfo.$cpuid.$cpuid.$bioscsum.$bioscsum.$diskserial.$diskserial;

if (length $all < $min_len_data+$min_len_data)
{
  my $l = length $all;
  die "data length $l < 2*min_len_data=$min_len_data\n" if $ENV{MACHID_VERBOSE};
  exit 2;
}

my $digest = Digest::MD5::md5_hex($all);
$digest = $1 if $digest =~ m{.{8}(.{16})};		# pick the middle bytes;
$digest .= sprintf "%02x", unpack "%8C*", $digest;	# append checksum byte

if (open (OUT, ">$machid_file"))
  {
    print OUT "$digest\n";
    close OUT or warn "write $machid_file failed: $!\n";
  }
else
  {
    warn "$machid_file: write failed: $!\n";
  }

print "$digest\n";
exit 0;
###############################################

sub read_macaddr
{
  # We can use either /sbin/ifconfig or /sbin/ip to collect the mac addresses.
  # there may be multiple adresses, but this is okay. Even if your laptop
  # has its ethernet behind an US dongle, you are likely to use the same USB dongle
  # when you connect.

  my @text;
  if (-x "/sbin/ip")
    {
      open I, '-|', '/sbin/ip addr show' or return '';
      while (defined (my $line = <I>))
        {
	  # capture all but the third lowest hex char.
	  push @text, $1.$2 if $line =~ m{\slink/ether\s+([\d:a-f]{13})..(..)}i;
	}
      close I;
    }
  elsif (-x "/sbin/ifconfig")
    {
      open I, '-|', '/sbin/ifconfig -a' or return '';
      while (defined (my $line = <I>))
        {
	  # capture all but the third lowest hex char.
	  push @text, $1.$2 if $line =~ m{\shwaddr\s+([\d:a-f]{13})..(..)}i;
	}
      close I;
    }
  else
    {
      # oops, no bits today.
    }

  my $text = join '', sort @text;
  $text =~ s{:}{}g;
  return lc $text;
}

sub read_cpuinfo
{
  ## unlikely we ever need to mount, but good to know that we can.
  system "/bin/mount -n -t proc proc /proc" unless -r "/proc/cpuinfo";
  open I, '<', "/proc/cpuinfo" or return '';

  my $cpu_count = 0;
  my %text;
  while (defined (my $line = <I>))
    {
      # no need to throw awy any bits here, this is only classification
      # cpu|revision|motherboard|clock	are available on ppc
      # cpu family|model|model name|stepping|cache size|vendor_id are on i386
      #
      $text{$2}++ if $line =~ m{(cpu family|family|cpu|revision|motherboard|clock|model|model name|stepping|cache size|vendor_id|vendor)\s+:\s+(.*)};
      $cpu_count++ if $line =~ m{processor\s+:};
    }
  close I;
    
  my $text = join '', sort keys %text;
  $text =~ s{:}{}g;
  $text .= "*$cpu_count" if $cpu_count > 1;
  return lc $text;
}

sub read_cpuid
{
  ## Processor ID: 078bfbff00000f40
  ##   corresponds to
  ## 0000000: 0100 0000 4175 7468 6341 4d44 656e 7469  ....AuthcAMDenti
  ## 0000010: 400f 0000 2408 0000 0000 0000 fffb 8b07  @...$...........
  ##
  ## Processor ID: 0000005500000683
  ##   corresponds to
  ## 0000000: 0200 0000 4765 6e75 6e74 656c 696e 6549  ....GenuntelineI
  ## 0000010: 8306 0000 0200 0000 0000 0000 fff9 8303  ................
  ## 0000020: 0101 0203 0000 0000 0000 0000 8208 040c  ................
  ## 
  ## so we better do not try to bring /dev/cpu and hwinfo into a common format...

  if (open I, '-|', '/usr/sbin/hwinfo --bios')
    {
      my @text;
      while (defined (my $line = <I>))
        {
	  push @text, $1.$2 if $line =~ m{Processor ID: 0x([a-f\d]{12})..(..)}i;
	}
      close I;
      return lc join '', sort @text if @text;
    }

  # modprobe -q is not quiet. -s helps. Sorry for polluting syslog.

  return '' unless $arch_pc and -r "/dev/cpu/0/cpuid";
  open I, '<', "/dev/cpu/0/cpuid" or system "/sbin/modprobe -s cpuid";
  open I, '<', "/dev/cpu/0/cpuid" or return '';
  my $buffer = '';
  read(I, $buffer, 48);
  close I;
#  system "/sbin/rmmod cpuid";	# more dangerous if we do, or if we don't ?

  return '' unless length($buffer) == 48;
  my $text = unpack 'H96', $buffer;
  $text =~ s{..(............)..}{$1}g;	# drop 2 out of 8 bytes
  return $text;
}

sub read_diskserial
{
  ## hwinfo --fast --disk may hang when usb-scsi devices are present on 9.3
  ## hwscan --list --disk is not available on SLES8
  ## /var/lib/hardware/unique-keys/* is always present.

  my $keys_dir = "/var/lib/hardware/unique-keys/";
  opendir DIR, $keys_dir or return '';
  my @f = grep { !/^\./ } readdir DIR;
  closedir DIR;

  my %text;
  for my $f (@f)
    {
      if (open I, "<$keys_dir/$f")
        {
	  my $isadisk = 0;
	  my $removable = 0;	
	  my $serial = undef;
	  while (defined (my $line = <I>))
	    {
	      chomp $line;
	      $isadisk++   if $line =~ m{HWClass=disk}i;
	      $removable++ if $line =~ m{Drivers=usb}; 		# sles8 has no Drivers=, 

	      # ignore first and second-last character of serial-no.
	      $serial = $1.$2 if $line =~ m{Serial=.(\S+).(\S)};
	    }
	  close I;

	  # Use a hash to protect against duplicate keys.
	  # Fill in late to make order of lines irrelelvant.
	  $text{$serial}++ if $isadisk and !$removable;
	}
    }
  return lc join '', sort keys %text;
}


sub read_bioscsum
{
  if ($arch_pc)
    {
      ## Steffen says, bios is always mapped at /dev/mem, 0xf0000 to 0xf8000
      open I, "</dev/mem" or return '';
      unless (sysseek I, 0xf0000, 0 == 0xf0000 ) { close I; return ''; }
      my $buf = '';
      sysread I, $buf, 0x800;
      close I;
      return '' unless length $buf == 0x800;
      return sprintf "%08x", unpack "%32C*", $buf;
    }

  if ($arch_ipf)
    {
      my $firmware_version = "/proc/pal/cpu0/version_info";
      my %text;
      open I, "<$firmware_version" or return '';
      while (defined (my $line = <I>))
        {
	  $text{$1}++ if $line =~ m{:\s+([\d\.]+)};
	}

      my $text = join '', sort keys %text;
      $text =~ s{\.}{}g;	# no dots
      $text =~ s{.(.)$}{$1};	#ignore second last byte.
      close I;
      return $text;
    }

  return '';	# unknown arch, better don't touch
}
