Checkpointing Multithreaded Programs
by Christopher D. Carothers and Boleslaw K. Szymanski

Listing One
int checkpoint_waits={0,0,0,0}
pid_t checkpoint_min_pid=0x7fffffff
spinlock_t checkpoint_mm_lock=
  SPIN_LOCK_UNLOCKED
struct m_struct*checkpoint_mm=NULL
  struct_task_lock = SPIN_LOCK_UNLOCKED
struct tast_struct*
  checkpoint_parent_task=
  NULL


Listing Two
sys_checkpoint( regs )
{
  return(do_checkpoint(regs));
}

do_checkpoint(regs)
{
  admission();
  if( create_mm( regs ) == error )
    return( error );
  clone_threads(regs);
  restore_mm();
  leave();
}


Listing Three
admission()
{
  old_mm = current->mm;
  if( current->mm->mm_users == 1 )
    {
      clone_flags = SIGCHLD;
      return(do_fork(clone_flags, regs));
    }

  spin_lock(&checkpoint_mm_lock);

  while( checkpoint_mm != NULL &&
       checkpoint_mm != old_mm )
    {
      spin_unlock(&checkpoint_mm_lock);
      schedule_timeout(1);
      spin_lock(&checkpoint_mm_lock);
    }

  if( checkpoint_mm == NULL )
    checkpoint_mm = old_mm;

  spin_unlock(&checkpoint_mm_lock);

  if( current->checkpoint_counter == 0 )
    {
      mm_users = current->mm->mm_users - 1;
    }
  else
    {
      mm_users = current->mm->mm_users;
    }

  spin_lock(&checkpoint_task_lock);

  if( current->pid < checkpoint_min_pid )
    {
      checkpoint_min_pid = current->pid;
    }

  checkpoint_waits[0]++;

  while( checkpoint_waits[0] < mm_users )
    {
      spin_unlock(&checkpoint_task_lock);
      schedule_timeout(1);
      spin_lock(&checkpoint_task_lock);
    }

  spin_unlock(&checkpoint_task_lock);
}


Listing Four
create_mm(regs)
{
  if( current->pid == checkpoint_min_pid )
    {
      checkpoint_parent_task = current;
      parent = current;
      if(( new_mm = allocate_mm() ) == NULL )
      {
        notify_other_threads_of_error;
        reset_ global_variables;
        return(error);
      }

      memcpy(new_mm, parent->mm);
      dup_mmap(new_mm);
      copy_segments(new_mm);
      old_mm = parrent->mm;
      parent->mm := new_mm;
      parent->active_mm = new_mm;
      activate_mm(old_mm, new_mm);
      spin_lock(&checkpoint_task_lock);
      checkpoint_waits[1] = 1;
      spin_unlock(&checkpoint_task_lock);
    }
  else
    {
      spin_lock(&checkpoint_task_lock);

      while( checkpoint_waits[1] == 0 )
      {
        spin_unlock(&checkpoint_task_lock);
        if( parent_detects_error )
          {
            return(error);
          }
        schedule_timeout(1);
        spin_lock(&checkpoint_task_lock);
      }
      spin_unlock(&checkpoint_task_lock);
      parent = checkpoint_parent_task;
      old_mm = current->mm;
      current->mm = parent->mm;
      current->active_mm = parent->mm;
      current->mm->mm_users++;
      activate_mm(old_mm, current->mm);
    }
}


Listing Five
clone_threads(regs)
{
  current->checkpoint_counter++;
  clone_flags = (CLONE_VM | SIGCHLD);
  retval = do_fork(clone_flags, regs);
  current->checkpoint_counter--;

  spin_lock( &checkpoint_task_lock);

  while( checkpoint_waits[2] < mm_users )
    {
      spin_unlock(&checkpoint_task_lock);
      schedule_timeout(1);
      spin_lock(&checkpoint_task_lock);
    }
  spin_unlock( &checkpoint_task_lock);
}


Listing Six
restore_mm(regs)
{
  new_mm = current->mm;
  current->mm = old_mm;
  current->active_mm = old_mm;
  new_mm-> mm_users--;
  activate_mm(new_mm, old_mm);
}


Listing Seven
leave(regs)
{
  if( parent == current )
    {
      spin_lock(&checkpoint_task_lock);
      while( checkpoint_waits[3] != mm_users -1 )
      {
        spin_unlock(&checkpoint_task_lock);
        schedule_timeout(1);
        spin_lock(&checkpoint_task_lock);
      }

      checkpoint_min_pid = 0x7fffffff;
      checkpoint_waits = {0,0,0,0};
      checkpoint_parent_task = NULL;

      spin_lock(&checkpoint_mm_lock);
      checkpoint_mm = NULL;
      spin_unlock(&checkpoint_mm_lock);
      spin_unlock(&checkpoint_task_lock);
    }
  else
    {
      spin_lock(&checkpoint_task_lock);
      checkpoint_waits[3]++;
      spin_unlock(&checkpoint_task_lock);
    }
}





