Treat pages as string (incomplete) and default to UTF8

This commit is contained in:
Nicolò P 2024-02-20 15:58:23 +01:00
parent 1d98c876c3
commit 5a57469f1c
2 changed files with 19 additions and 38 deletions

View File

@ -11,6 +11,8 @@ namespace Txt2Bib.Records
public abstract record ItemType
{
public string Pages { get; set; } = "";
protected static bool IsDoi(string url)
{
return url.Contains("doi");
@ -22,6 +24,15 @@ namespace Txt2Bib.Records
return s.Length > 1 ?
$"{s[1]} {s[0][0]}{s[0][1..].ToLowerInvariant()}" : a;
}
protected string CreatePages(string pages)
{
if (pages.Length == 0) { return ""; }
return pages.Split('-').Length == 2 ?
$"{pages.Split('-')[0]}--{pages.Split('-')[1].TrimEnd('.')}" :
pages;
}
}
public record class Article : ItemType, IBib
@ -33,8 +44,6 @@ namespace Txt2Bib.Records
public ushort Year { get; set; } = 1950;
public string Volume { get; set; } = "";
public string? Issue { get; set; } = null;
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Doi { get; set; } = "";
public string Url { get; set; } = "";
@ -50,15 +59,7 @@ namespace Txt2Bib.Records
Volume = entryLines[5] != string.Empty ? checkVol(entryLines[5]) : Volume;
Issue = entryLines[5].Split(',').Length == 2 ?
entryLines[5].Split(',')[1].Trim() : Issue;
try
{
FirstPage = ushort.Parse(entryLines[6].Split('-')[0]);
LastPage = ushort.Parse(entryLines[6].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato pagine non corretto");
}
Pages = CreatePages(entryLines[6]);
if (entryLines.Length > 7 )
{
@ -78,7 +79,7 @@ namespace Txt2Bib.Records
$"\tyear = \"{Year}\",\n" +
$"\tvolume = \"{Volume}\",\n" +
$"\tnumber = \"{Issue}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" +
$"\tpages = \"{Pages}\",\n" +
$"\tdoi = \"{Doi}\",\n" +
$"\turl = \"{Url}\",\n" +
"}\n";
@ -164,8 +165,6 @@ namespace Txt2Bib.Records
public string Publisher { get; set; } = "";
public string Series { get; set; } = "";
public string Address { get; set; } = "";
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Url { get; set; } = "";
public string Doi { get; set; } = "";
@ -181,15 +180,7 @@ namespace Txt2Bib.Records
Series = entryLines[6] != String.Empty ? entryLines[6] : Series;
Address = entryLines[7] != String.Empty ? entryLines[7] : Address;
Publisher = entryLines[8];
try
{
FirstPage = ushort.Parse(entryLines[9].Split('-')[0]);
LastPage = ushort.Parse(entryLines[9].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato numeri di pagina errato...");
}
Pages = CreatePages(entryLines[9]);
if (entryLines.Length > 10)
{
@ -216,7 +207,7 @@ namespace Txt2Bib.Records
$"\tseries = {{{Series}}},\n" +
$"\taddress = \"{Address}\",\n" +
$"\tyear = \"{Year}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" +
$"\tpages = \"{Pages}\",\n" +
$"\turl = \"{Url}\",\n" +
$"\tdoi = \"{Doi}\",\n" +
"}\n";
@ -234,8 +225,6 @@ namespace Txt2Bib.Records
public string Publisher { get; set; } = "";
public string Series { get; set; } = "";
public string Address { get; set; } = "";
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Url { get; set; } = "";
public string Doi { get; set; } = "";
@ -249,15 +238,7 @@ namespace Txt2Bib.Records
Series = entryLines[6] != String.Empty ? entryLines[6] : Series;
Address = entryLines[7] != String.Empty ? entryLines[7] : Address;
Publisher = entryLines[8];
try
{
FirstPage = ushort.Parse(entryLines[9].Split('-')[0]);
LastPage = ushort.Parse(entryLines[9].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato numeri di pagina errato...");
}
Pages = CreatePages(entryLines[9]);
if (entryLines.Length > 10)
{
@ -283,7 +264,7 @@ namespace Txt2Bib.Records
$"\tseries = {{{Series}}},\n" +
$"\taddress = \"{Address}\",\n" +
$"\tyear = \"{Year}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" +
$"\tpages = \"{Pages}\",\n" +
$"\turl = \"{Url}\",\n" +
$"\tdoi = \"{Doi}\",\n" +
"}\n";

View File

@ -43,8 +43,8 @@ namespace Txt2Bib
{
using var reader = File.OpenText(path);
byte[] contentBytes = File.ReadAllBytes(path);
// Use Windows 1252??
var content = Encoding.Latin1.GetString(contentBytes);
// Assumes UTF8!
var content = Encoding.UTF8.GetString(contentBytes);
IEnumerable<string> entries = new List<string>();
try