Calculating column widths in Excel Open XML

Commenter Roie said that the formula for calculating column widths in Excel Open XML should still be manageable. This is despite the scary formula mentioned before:

width = Truncate([{Number of Characters} * {Maximum Digit Width} + {5 pixel padding}] / {Maximum Digit Width} * 256) / 256

So I put on my explorer’s hat, and decided against my better judgment to delve into that inexplicable equation. Turns out, it wasn’t too bad. Here’s a screenshot of the resulting Excel file:

Excel Open XML calculate column widths

The code is almost the same as when we’re setting custom column widths, but here it is in its full glory:

static void Main(string[] args)
{
	string sFile = "ExcelOpenXmlCalculateColumnWidth.xlsx";
	if (File.Exists(sFile))
	{
		File.Delete(sFile);
	}
	BuildWorkbook(sFile);
}

private static void BuildWorkbook(string filename)
{
	try
	{
		using (SpreadsheetDocument xl = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
		{
			WorkbookPart wbp = xl.AddWorkbookPart();
			WorksheetPart wsp = wbp.AddNewPart<WorksheetPart>();
			Workbook wb = new Workbook();
			FileVersion fv = new FileVersion();
			fv.ApplicationName = "Microsoft Office Excel";
			Worksheet ws = new Worksheet();
			SheetData sd = new SheetData();

			WorkbookStylesPart wbsp = wbp.AddNewPart<WorkbookStylesPart>();
			wbsp.Stylesheet = CreateStylesheet();
			wbsp.Stylesheet.Save();

			string sILT = "Iced Lemon Tea Is An Awesome Drink!";
			double fSimpleWidth = 0.0f;
			double fWidthOfZero = 0.0f;
			double fDigitWidth = 0.0f;
			double fMaxDigitWidth = 0.0f;
			double fTruncWidth = 0.0f;

			System.Drawing.Font drawfont = new System.Drawing.Font("Calibri", 11);
			// I just need a Graphics object. Any reasonable bitmap size will do.
			Graphics g = Graphics.FromImage(new Bitmap(200,200));
			fWidthOfZero = (double)g.MeasureString("0", drawfont).Width;
			fSimpleWidth = (double)g.MeasureString(sILT, drawfont).Width;
			fSimpleWidth = fSimpleWidth / fWidthOfZero;

			for (int i = 0; i < 10; ++i)
			{
				fDigitWidth = (double)g.MeasureString(i.ToString(), drawfont).Width;
				if (fDigitWidth > fMaxDigitWidth)
				{
					fMaxDigitWidth = fDigitWidth;
				}
			}
			g.Dispose();
			
			// Truncate([{Number of Characters} * {Maximum Digit Width} + {5 pixel padding}] / {Maximum Digit Width} * 256) / 256
			fTruncWidth = Math.Truncate((sILT.ToCharArray().Count() * fMaxDigitWidth + 5.0) / fMaxDigitWidth * 256.0) / 256.0;

			Columns columns = new Columns();
			columns.Append(CreateColumnData(1, 1, fSimpleWidth));
			columns.Append(CreateColumnData(2, 2, fTruncWidth));
			columns.Append(CreateColumnData(3, 3, 35.42578125));
			ws.Append(columns);

			Row r;
			Cell c;

			r = new Row();
			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "A1";
			c.CellValue = new CellValue(sILT);
			r.Append(c);

			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "B1";
			c.CellValue = new CellValue(sILT);
			r.Append(c);

			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "C1";
			c.CellValue = new CellValue(sILT);
			r.Append(c);
			sd.Append(r);

			r = new Row();
			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "A2";
			c.CellValue = new CellValue(string.Format("Simple width: {0}", fSimpleWidth));
			r.Append(c);
			sd.Append(r);

			r = new Row();
			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "A3";
			c.CellValue = new CellValue(string.Format("Truncation width: {0}", fTruncWidth));
			r.Append(c);
			sd.Append(r);

			r = new Row();
			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "A4";
			c.CellValue = new CellValue(string.Format("Width of '0': {0}", fWidthOfZero));
			r.Append(c);
			sd.Append(r);

			r = new Row();
			c = new Cell();
			c.DataType = CellValues.String;
			c.CellReference = "A5";
			c.CellValue = new CellValue(string.Format("Max Width of Digits: {0}", fMaxDigitWidth));
			r.Append(c);
			sd.Append(r);

			ws.Append(sd);
			wsp.Worksheet = ws;
			wsp.Worksheet.Save();
			Sheets sheets = new Sheets();
			Sheet sheet = new Sheet();
			sheet.Name = "Sheet1";
			sheet.SheetId = 1;
			sheet.Id = wbp.GetIdOfPart(wsp);
			sheets.Append(sheet);
			wb.Append(fv);
			wb.Append(sheets);

			xl.WorkbookPart.Workbook = wb;
			xl.WorkbookPart.Workbook.Save();
			xl.Close();
		}
	}
	catch (Exception e)
	{
		Console.WriteLine(e.ToString());
		Console.ReadLine();
	}
}

private static Column CreateColumnData(UInt32 StartColumnIndex, UInt32 EndColumnIndex, double ColumnWidth)
{
	Column column;
	column = new Column();
	column.Min = StartColumnIndex;
	column.Max = EndColumnIndex;
	column.Width = ColumnWidth;
	column.CustomWidth = true;
	return column;
}

private static Stylesheet CreateStylesheet()
{
	Stylesheet ss = new Stylesheet();

	Fonts fts = new Fonts();
	DocumentFormat.OpenXml.Spreadsheet.Font ft = new DocumentFormat.OpenXml.Spreadsheet.Font();
	FontName ftn = new FontName();
	ftn.Val = "Calibri";
	FontSize ftsz = new FontSize();
	ftsz.Val = 11;
	ft.FontName = ftn;
	ft.FontSize = ftsz;
	fts.Append(ft);
	fts.Count = (uint)fts.ChildElements.Count;

	Fills fills = new Fills();
	Fill fill;
	PatternFill patternFill;
	fill = new Fill();
	patternFill = new PatternFill();
	patternFill.PatternType = PatternValues.None;
	fill.PatternFill = patternFill;
	fills.Append(fill);
	fill = new Fill();
	patternFill = new PatternFill();
	patternFill.PatternType = PatternValues.Gray125;
	fill.PatternFill = patternFill;
	fills.Append(fill);
	fills.Count = (uint)fills.ChildElements.Count;

	Borders borders = new Borders();
	Border border = new Border();
	border.LeftBorder = new LeftBorder();
	border.RightBorder = new RightBorder();
	border.TopBorder = new TopBorder();
	border.BottomBorder = new BottomBorder();
	border.DiagonalBorder = new DiagonalBorder();
	borders.Append(border);
	borders.Count = (uint)borders.ChildElements.Count;

	CellStyleFormats csfs = new CellStyleFormats();
	CellFormat cf = new CellFormat();
	cf.NumberFormatId = 0;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	csfs.Append(cf);
	csfs.Count = (uint)csfs.ChildElements.Count;

	uint iExcelIndex = 164;
	NumberFormats nfs = new NumberFormats();
	CellFormats cfs = new CellFormats();

	cf = new CellFormat();
	cf.NumberFormatId = 0;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	cf.FormatId = 0;
	cfs.Append(cf);

	NumberFormat nf;
	nf = new NumberFormat();
	nf.NumberFormatId = iExcelIndex++;
	nf.FormatCode = "dd/mm/yyyy hh:mm:ss";
	nfs.Append(nf);
	cf = new CellFormat();
	cf.NumberFormatId = nf.NumberFormatId;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	cf.FormatId = 0;
	cf.ApplyNumberFormat = true;
	cfs.Append(cf);

	nf = new NumberFormat();
	nf.NumberFormatId = iExcelIndex++;
	nf.FormatCode = "#,##0.0000";
	nfs.Append(nf);
	cf = new CellFormat();
	cf.NumberFormatId = nf.NumberFormatId;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	cf.FormatId = 0;
	cf.ApplyNumberFormat = true;
	cfs.Append(cf);

	// #,##0.00 is also Excel style index 4
	nf = new NumberFormat();
	nf.NumberFormatId = iExcelIndex++;
	nf.FormatCode = "#,##0.00";
	nfs.Append(nf);
	cf = new CellFormat();
	cf.NumberFormatId = nf.NumberFormatId;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	cf.FormatId = 0;
	cf.ApplyNumberFormat = true;
	cfs.Append(cf);

	// @ is also Excel style index 49
	nf = new NumberFormat();
	nf.NumberFormatId = iExcelIndex++;
	nf.FormatCode = "@";
	nfs.Append(nf);
	cf = new CellFormat();
	cf.NumberFormatId = nf.NumberFormatId;
	cf.FontId = 0;
	cf.FillId = 0;
	cf.BorderId = 0;
	cf.FormatId = 0;
	cf.ApplyNumberFormat = true;
	cfs.Append(cf);

	nfs.Count = (uint)nfs.ChildElements.Count;
	cfs.Count = (uint)cfs.ChildElements.Count;

	ss.Append(nfs);
	ss.Append(fts);
	ss.Append(fills);
	ss.Append(borders);
	ss.Append(csfs);
	ss.Append(cfs);

	CellStyles css = new CellStyles();
	CellStyle cs = new CellStyle();
	cs.Name = "Normal";
	cs.FormatId = 0;
	cs.BuiltinId = 0;
	css.Append(cs);
	css.Count = (uint)css.ChildElements.Count;
	ss.Append(css);

	DifferentialFormats dfs = new DifferentialFormats();
	dfs.Count = 0;
	ss.Append(dfs);

	TableStyles tss = new TableStyles();
	tss.Count = 0;
	tss.DefaultTableStyle = "TableStyleMedium9";
	tss.DefaultPivotStyle = "PivotStyleLight16";
	ss.Append(tss);

	return ss;
}

Note this new part:

string sILT = "Iced Lemon Tea Is An Awesome Drink!";
double fSimpleWidth = 0.0f;
double fWidthOfZero = 0.0f;
double fDigitWidth = 0.0f;
double fMaxDigitWidth = 0.0f;
double fTruncWidth = 0.0f;

System.Drawing.Font drawfont = new System.Drawing.Font("Calibri", 11);
// I just need a Graphics object. Any reasonable bitmap size will do.
Graphics g = Graphics.FromImage(new Bitmap(200,200));
fWidthOfZero = (double)g.MeasureString("0", drawfont).Width;
fSimpleWidth = (double)g.MeasureString(sILT, drawfont).Width;
fSimpleWidth = fSimpleWidth / fWidthOfZero;

for (int i = 0; i < 10; ++i)
{
	fDigitWidth = (double)g.MeasureString(i.ToString(), drawfont).Width;
	if (fDigitWidth > fMaxDigitWidth)
	{
		fMaxDigitWidth = fDigitWidth;
	}
}
g.Dispose();

// Truncate([{Number of Characters} * {Maximum Digit Width} + {5 pixel padding}] / {Maximum Digit Width} * 256) / 256
fTruncWidth = Math.Truncate((sILT.ToCharArray().Count() * fMaxDigitWidth + 5.0) / fMaxDigitWidth * 256.0) / 256.0;

I am using the MeasureString() function of the Graphics object to get the pixel width of a string when rendered in a particular font.

Roie gave a simple formula to calculate the column width, which is the pixel width of the string divided by the pixel of the zero character “0″. So let’s test that.

Then there’s that beast of a formula. We’ll need the number of characters in the string and the maximum pixel width of the digits. Turns out that the maximum pixel width of all digits is that of “0″. So Roie was right on that. Actually it’s logical, because “0″ spans the horizontal and vertical space. If you want to test for alphabets, then “M” and “W” are my guesses.

Anyway, I created the Excel file, then did the save as zip and opening the XML file trick. And found that 35.42578125 to be the column width to comfortably cover the string “Iced Lemon Tea Is An Awesome Drink!” rendered in Calibri font.

What do you know, 35.42578125 is also what was calculated using that unwieldy formula. So yeah, that wasn’t so bad.

Roie simplified that formula, and my only gripe is that rounding errors might add up when the string is long. Ah well, the only way you know if your theory works is if you test it. I’m paranoid about these things because my math background practically force me to look at calculation errors of any formula. You do not want a sin(x) = x error on your hands.

So there you have it. The custom column widths are calculable. Have fun with the code and the resulting Excel file.

There’s updated material and source code, together with more information on how to work with Open XML. Click here to find out more.

Comments

  1. Hi, Looking at this I didn’t think it should work because it is sizing treating all characters as the same size which they are not (Number of characters * Max Digit Width). Strangely it seems to work perfectly with your selected string, but if you try it with a string consisting entirely of lower case “l” characters you will see it does not auto size the column correctly at all.

    I think the Number of characters value is supposed to be the relative number of zero characters (which you already have as fSimpleWidth), if the average size of the characters you are using is the same as the size of the numeric digit then just the number of characters would equate to the same thing but if the average size is different it is wrong.

    I tried the calculation using fSimpleWidth instead of the number of characters but that was also wrong and I’m afraid I don’t know why, one other oddity is that the spec says “Using the Calibri font as an example, the maximum digit width of 11 point font size is 7 pixels (at 96 dpi)” so it is odd that your data shows width of zero in exactly that font to be 12.544…

    Apologies if I’m missing something here

  2. Vincent says:

    Hi Chris, if you used the simple width method, you’ll definitely hit problems with lower case l’s for example. That’s why it’s called the simple width method. It assumes that there’s a range of characters in the text such that the average width can be used.

    I modified my program and tried with 5 l’s, that is “lllll”. The simple width method failed to cover its column. But the method from the spec did its job. It calculated a width wide enough to cover “lllll” with a bit of buffer.

    Maybe there’s a way to calculate the exact width when the data string is rendered in the column. But I believe the method from the spec is used to cover the data string comfortably, not exactly.

    Remember, this is Excel. A user can easily adjust the column width.

  3. Hi Vincent

    I had to do this in the end because the xlsx files I am interested in are auto generated and should look nice as soon as they are opened so I looked into this a little further and found there are a couple of issues to accurately sizing columns in Excel.

    1. Need to use accurate character sizing, which means that instead of using MeasureString you need to use MeasureCharacterRanges, see http://groups.google.com/group/microsoft.public.office.developer.com.add_ins/browse_thread/thread/2fc33557feb72ab4/adaddc50480b8cff?lnk=raot

    2. Despite the spec saying to add 5 pixels (1 for border and 2 for each side margin) Excel seems to use 9 – 1 for the border, 5 for the leading space and 3 for the trailing space – I only found this by using the accessibility app. Magnifier and counting the pixels after using Excel to auto fit the columns

    Actually I was basing my calculations on underlying font metrics so I don’t actually use either MeasureCharacterRanges or MeasureString. If anyone is interested in doing this from font metrics then:

    Width=Truncate({DesiredWidth}+9/{MaxDigitWidth})/256
    {MaxDigitWidth} is an integer rounded to the nearest pixel of any of the 0..9 digits at 96 dpi
    {DesiredWidth} is the sum of adding all character widths together where each character width is the width of the character at 96 dpi rounded to the nearest integer. Note that each character is rounded not the overall sum

  4. Vincent says:

    Well, Chris, that looks awesome. A person with incentive to do something usually tries harder. I’d say you did a lot of research.

    The initial reason for my foray into this was because I had to create an Excel file with predictable column widths. Having automatic resizing column widths wasn’t necessary.

    Thanks for sharing your results.

Trackbacks

  1. [...] This post was mentioned on Twitter by Michael Kiselman, Vincent Tan. Vincent Tan said: I found out that, calculating "best fit" column widths isn't that hard after all… http://bit.ly/8d9vxU [...]