Menu

GoogleSheet - Check duplicates

We've created a tool that allows you to merge lines from your Bulk files without conflict, and to point out lines with conflicts so you can resolve them manually.

The tutorial

To set it up, follow the steps below:

[🎥 video coming soon]

  • Go to your Bulk SEO file on Google Sheet
  • Click on the "Extensions" menu
  • Click on the "App Script" menu
  • A new window opens.
  • Copy and paste the code below into the window
function onOpen() {
  var ui = SpreadsheetApp.getUi();
  ui.createMenu('Tools EdgeSEO')
      .addItem(' Check doublons', 'showForm')
      .addToUi();
}

function showForm() {
  var htmlOutput = HtmlService.createHtmlOutputFromFile('form')
      .setWidth(700)
      .setHeight(500);
  SpreadsheetApp.getUi().showModalDialog(htmlOutput, 'Tool check doublons');
}

function checkAndMergeDuplicatesStep2(sheetName, emailAddresses) {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
  if (!sheet) {
    SpreadsheetApp.getUi().alert('Sheet not found: ' + sheetName);
    return;
  }

  var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
  var spreadsheetUrl = spreadsheet.getUrl();
  var sheetId = sheet.getSheetId();
  var sheetUrl = spreadsheetUrl + '#gid=' + sheetId;
  
  var data = sheet.getDataRange().getValues();
  var headers = data[0].map(function(header) { return header.replace(/<|>/g, ''); }); // Enlever les chevrons des noms de colonnes
  var totalRows = data.length - 1; // Subtract 1 for header row
  var urlColumnIndex = 0; // Column A
  var duplicateUrls = {};
  var mergedRows = [];
  var originalIndices = {};

  Logger.log('Total rows (excluding header): ' + totalRows);

  // Identify duplicates and store original indices
  for (var i = 1; i < data.length; i++) {
    var url = data[i][urlColumnIndex];
    if (url) {
      if (duplicateUrls[url]) {
        duplicateUrls[url].push(i);
      } else {
        duplicateUrls[url] = [i];
      }
      if (!originalIndices[url]) {
        originalIndices[url] = [];
      }
      originalIndices[url].push(i + 1); // Store 1-based index
    }
  }

  var duplicateCount = 0;
  for (var url in duplicateUrls) {
    if (duplicateUrls[url].length > 1) {
      duplicateCount++;
    }
  }
  Logger.log('Number of duplicate URLs: ' + duplicateCount);

  var rowsToDelete = [];
  var conflictRows = [];
  var conflictLogs = [];

  // Process each set of duplicates
  for (var url in duplicateUrls) {
    if (duplicateUrls[url].length > 1) {
      var baseRowIdx = duplicateUrls[url][0];
      var baseRow = data[baseRowIdx].slice(); // Copy the base row
      var mergedLines = [originalIndices[url][0]]; // Use original index

      for (var j = 1; j < duplicateUrls[url].length; j++) {
        var currentRowIdx = duplicateUrls[url][j];
        var currentRow = data[currentRowIdx];
        var canMerge = true;
        var conflictColumns = [];

        // Check for conflicts in columns B to S (indices 1 to 18)
        for (var col = 1; col <= 18; col++) {
          if (baseRow[col] && currentRow[col] && baseRow[col] !== currentRow[col]) {
            canMerge = false;
            conflictColumns.push(headers[col]); // Store header name for column
          }
        }

        // Merge if no conflict
        if (canMerge) {
          for (var col = 1; col <= 18; col++) {
            if (!baseRow[col] && currentRow[col]) {
              baseRow[col] = currentRow[col];
            }
          }
          rowsToDelete.push(currentRowIdx);
          mergedLines.push(originalIndices[url][j]); // Use original index
          Logger.log('Merged row ' + originalIndices[url][j] + ' into row ' + originalIndices[url][0]);
        } else {
          conflictRows.push({url: url, lines: originalIndices[url][0] + ' / ' + originalIndices[url][j], conflicts: conflictColumns.join(' / ')});
          var conflictLog = 'Conflit trouvé entre les lignes ' + originalIndices[url][j] + ' et ' + originalIndices[url][0] + ' pour les colonnes ' + conflictColumns.join(' / ');
          conflictLogs.push(conflictLog);
          Logger.log(conflictLog);
        }
      }
      if (mergedLines.length > 1) {
        mergedRows.push({url: url, lines: mergedLines.join(' / ')});
      }
      
      // Update the base row in the sheet
      for (var col = 1; col <= 18; col++) {
        sheet.getRange(baseRowIdx + 1, col + 1).setValue(baseRow[col]); // +1 for 1-based index, +1 to skip URL column
      }
    }
  }

  // Sort rows to delete in descending order
  rowsToDelete.sort(function(a, b) { return b - a; });

  // Delete rows
  for (var k = 0; k < rowsToDelete.length; k++) {
    sheet.deleteRow(rowsToDelete[k] + 1); // Adjust for header row
  }
  
  Logger.log('Rows deleted: ' + rowsToDelete.length);

  // Refresh data after deletions
  data = sheet.getDataRange().getValues();

  // Create and send the report
  var mergedRowsTable = '<table border="1" style="width: 100%; border-collapse: collapse;"><tr><th style="width: 50%; background-color: #34495e; color: white; text-align: left; padding: 8px;">URL</th><th style="width: 50%; background-color: #34495e; color: white; text-align: left; padding: 8px;">Lignes fusionnées</th></tr>';
  for (var m = 0; m < mergedRows.length; m++) {
    var rowColor = (m % 2 == 0) ? '#FFF' : '#ecf0f1';
    mergedRowsTable += '<tr style="background-color: ' + rowColor + '; height: 30px;"><td style="padding: 8px;">' + mergedRows[m].url + '</td><td style="padding: 8px;">' + mergedRows[m].lines + '</td></tr>';
  }
  mergedRowsTable += '</table>';
  
  var conflictRowsTable = '<table border="1" style="width: 100%; border-collapse: collapse;"><tr><th style="width: 50%; background-color: #34495e; color: white; text-align: left; padding: 8px;">URL</th><th style="width: 50%; background-color: #34495e; color: white; text-align: left; padding: 8px;">Ligne avec des conflits</th></tr>';
  for (var c = 0; c < conflictRows.length; c++) {
    var rowColor = (c % 2 == 0) ? '#FFF' : '#ecf0f1';
    var updatedRowIndices = conflictRows[c].lines;
    conflictRowsTable += '<tr style="background-color: ' + rowColor + '; height: 30px;"><td style="padding: 8px;">' + conflictRows[c].url + '</td><td style="padding: 8px;">L.' + updatedRowIndices + ' - ' + conflictRows[c].conflicts + '</td></tr>';
  }
  conflictRowsTable += '</table>';

  var now = new Date();
  var formattedDate = Utilities.formatDate(now, Session.getScriptTimeZone(), 'dd/MM/yy - HH:mm');
  var emailSubject = 'EdgeSEO - ' + formattedDate + ' - Rapport Check Doublons';
  var emailBody = '<p>Bonjour 👋,</p>';
  emailBody += '<p>Voici le rapport du fichier : <a href="' + sheetUrl + '">' + spreadsheet.getName() + ' - ' + sheetName + '</a>.</p>';
  emailBody += '<h2>Rapport des lignes fusionnées</h2><h3>Lignes fusionnées</h3>' + mergedRowsTable + '<h3>Ligne avec des conflits</h3>' + conflictRowsTable;

  MailApp.sendEmail({
    to: emailAddresses,
    subject: emailSubject,
    htmlBody: emailBody
  });
}

function processForm(formObject) {
  var sheetName = formObject.sheetName;
  var emailAddresses = formObject.emailAddresses;
  checkAndMergeDuplicatesStep2(sheetName, emailAddresses);
}

  • Click on the save icon 💾 to save the code
  • Click on the + button to the right of the Files menu
  • Select HTML from the
  • Rename the "Untitled" file to "form".
  • Copy and paste the code below into the form.html file
<!DOCTYPE html>
<html>
<head>
  <base target="_top">
  <style>
    body, p {
      font-family: Arial, sans-serif;
      margin: 0;
      padding: 0;
      background-color: #FFF;
    }

    p {
      margin-bottom: 20px;
    }

    .container {
      max-width: 700px;
      margin: auto;
      background: #fff;
      padding: 20px;
    }

    label {
      display: block;
      margin-bottom: 8px;
      color: #333;
      font-weight: bold;
    }
    input[type="text"], select {
      width: 100%;
      padding: 10px;
      margin-bottom: 20px;
      border: 1px solid #ccc;
      border-radius: 4px;
      box-sizing: border-box;
    }
    input[type="button"] {
      width: 100%;
      background-color: #34495e;
      color: white;
      padding: 14px 20px;
      margin: 8px 0;
      border: none;
      border-radius: 4px;
      cursor: pointer;
    }
    input[type="button"]:hover {
      background-color: #2c3e50;
    }
    .confirmation {
      display: none;
      text-align: center;
      margin-top: 20px;
      color: #333;
      font-size: 16px;
    }
    .close-button {
      width: auto;
      background-color: #e74c3c;
      color: white;
      padding: 14px 20px;
      margin: 8px 0;
      border: none;
      border-radius: 4px;
      cursor: pointer;
    }
    .close-button:hover {
      background-color: #c0392b;
    }
  </style>
</head>
<body>
  <div class="container" id="formContainer">
    <p id="infoText">L'outil permet de trouver les URLs en double dans la colonne url de l'app « Bulk SEO » et les fusionner s'il n'y a pas de conflit de données.</p>
    <form id="mergeForm">
      <label for="sheetName">Nom de la feuille à tester :</label>
      <input type="text" id="sheetName" name="sheetName" placeholder="Exemple : Prod BulkSEO "><br>
      <label for="emailAddresses">Votre e-mail :</label>
      <input type="text" id="emailAddresses" name="emailAddresses" placeholder="Exemple : coucou@email.fr"><br>
      <input type="button" value="Valider" onclick="submitForm()">
    </form>
    <div class="confirmation" id="confirmationMessage">
      <strong>C'est fait 😄</strong><br /><br />
      Vous recevrez un rapport avec les lignes qui ont été fusionnées et celles avec des conflits à résoudre manuellement.<br /><br />
      <input type="button" class="close-button" value="Fermer" onclick="closeDialog()">
    </div>
  </div>
  <script>
    function submitForm() {
      var formObject = {
        sheetName: document.getElementById('sheetName').value,
        emailAddresses: document.getElementById('emailAddresses').value
      };
      google.script.run.withSuccessHandler(showConfirmation).processForm(formObject);
    }

    function showConfirmation() {
      document.getElementById('mergeForm').style.display = 'none';
      document.getElementById('infoText').style.display = 'none';
      document.getElementById('confirmationMessage').style.display = 'block';
    }

    function closeDialog() {
      google.script.host.close();
    }
  </script>
</body>
</html>

  • Click on the save icon 💾 to save the code
  • Return to your Google Sheet file
  • Refresh page
  • A new "Tool Edge SEO" menu appears after the Help menu
  • Click on it
  • Then click on the "Check doublon" tab
  • Confirm authorization by clicking OK, then select your Google account to authorize the script to run.
  • Click again on the "Tool Edge SEO" menu > "Check doublon".
  • A pop-up window opens
  • Enter the name of the sheet you wish to process
  • Enter your e-mail address to receive the report
  • Confirm