Treat pages as string (incomplete) and default to UTF8

This commit is contained in:
Nicolò P 2024-02-20 15:58:23 +01:00
parent 1d98c876c3
commit 5a57469f1c
2 changed files with 19 additions and 38 deletions

View File

@ -11,6 +11,8 @@ namespace Txt2Bib.Records
public abstract record ItemType public abstract record ItemType
{ {
public string Pages { get; set; } = "";
protected static bool IsDoi(string url) protected static bool IsDoi(string url)
{ {
return url.Contains("doi"); return url.Contains("doi");
@ -22,6 +24,15 @@ namespace Txt2Bib.Records
return s.Length > 1 ? return s.Length > 1 ?
$"{s[1]} {s[0][0]}{s[0][1..].ToLowerInvariant()}" : a; $"{s[1]} {s[0][0]}{s[0][1..].ToLowerInvariant()}" : a;
} }
protected string CreatePages(string pages)
{
if (pages.Length == 0) { return ""; }
return pages.Split('-').Length == 2 ?
$"{pages.Split('-')[0]}--{pages.Split('-')[1].TrimEnd('.')}" :
pages;
}
} }
public record class Article : ItemType, IBib public record class Article : ItemType, IBib
@ -33,8 +44,6 @@ namespace Txt2Bib.Records
public ushort Year { get; set; } = 1950; public ushort Year { get; set; } = 1950;
public string Volume { get; set; } = ""; public string Volume { get; set; } = "";
public string? Issue { get; set; } = null; public string? Issue { get; set; } = null;
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Doi { get; set; } = ""; public string Doi { get; set; } = "";
public string Url { get; set; } = ""; public string Url { get; set; } = "";
@ -50,15 +59,7 @@ namespace Txt2Bib.Records
Volume = entryLines[5] != string.Empty ? checkVol(entryLines[5]) : Volume; Volume = entryLines[5] != string.Empty ? checkVol(entryLines[5]) : Volume;
Issue = entryLines[5].Split(',').Length == 2 ? Issue = entryLines[5].Split(',').Length == 2 ?
entryLines[5].Split(',')[1].Trim() : Issue; entryLines[5].Split(',')[1].Trim() : Issue;
try Pages = CreatePages(entryLines[6]);
{
FirstPage = ushort.Parse(entryLines[6].Split('-')[0]);
LastPage = ushort.Parse(entryLines[6].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato pagine non corretto");
}
if (entryLines.Length > 7 ) if (entryLines.Length > 7 )
{ {
@ -78,7 +79,7 @@ namespace Txt2Bib.Records
$"\tyear = \"{Year}\",\n" + $"\tyear = \"{Year}\",\n" +
$"\tvolume = \"{Volume}\",\n" + $"\tvolume = \"{Volume}\",\n" +
$"\tnumber = \"{Issue}\",\n" + $"\tnumber = \"{Issue}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" + $"\tpages = \"{Pages}\",\n" +
$"\tdoi = \"{Doi}\",\n" + $"\tdoi = \"{Doi}\",\n" +
$"\turl = \"{Url}\",\n" + $"\turl = \"{Url}\",\n" +
"}\n"; "}\n";
@ -164,8 +165,6 @@ namespace Txt2Bib.Records
public string Publisher { get; set; } = ""; public string Publisher { get; set; } = "";
public string Series { get; set; } = ""; public string Series { get; set; } = "";
public string Address { get; set; } = ""; public string Address { get; set; } = "";
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Url { get; set; } = ""; public string Url { get; set; } = "";
public string Doi { get; set; } = ""; public string Doi { get; set; } = "";
@ -181,15 +180,7 @@ namespace Txt2Bib.Records
Series = entryLines[6] != String.Empty ? entryLines[6] : Series; Series = entryLines[6] != String.Empty ? entryLines[6] : Series;
Address = entryLines[7] != String.Empty ? entryLines[7] : Address; Address = entryLines[7] != String.Empty ? entryLines[7] : Address;
Publisher = entryLines[8]; Publisher = entryLines[8];
try Pages = CreatePages(entryLines[9]);
{
FirstPage = ushort.Parse(entryLines[9].Split('-')[0]);
LastPage = ushort.Parse(entryLines[9].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato numeri di pagina errato...");
}
if (entryLines.Length > 10) if (entryLines.Length > 10)
{ {
@ -216,7 +207,7 @@ namespace Txt2Bib.Records
$"\tseries = {{{Series}}},\n" + $"\tseries = {{{Series}}},\n" +
$"\taddress = \"{Address}\",\n" + $"\taddress = \"{Address}\",\n" +
$"\tyear = \"{Year}\",\n" + $"\tyear = \"{Year}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" + $"\tpages = \"{Pages}\",\n" +
$"\turl = \"{Url}\",\n" + $"\turl = \"{Url}\",\n" +
$"\tdoi = \"{Doi}\",\n" + $"\tdoi = \"{Doi}\",\n" +
"}\n"; "}\n";
@ -234,8 +225,6 @@ namespace Txt2Bib.Records
public string Publisher { get; set; } = ""; public string Publisher { get; set; } = "";
public string Series { get; set; } = ""; public string Series { get; set; } = "";
public string Address { get; set; } = ""; public string Address { get; set; } = "";
public ushort FirstPage { get; set; } = 1;
public ushort LastPage { get; set; } = 1;
public string Url { get; set; } = ""; public string Url { get; set; } = "";
public string Doi { get; set; } = ""; public string Doi { get; set; } = "";
@ -249,15 +238,7 @@ namespace Txt2Bib.Records
Series = entryLines[6] != String.Empty ? entryLines[6] : Series; Series = entryLines[6] != String.Empty ? entryLines[6] : Series;
Address = entryLines[7] != String.Empty ? entryLines[7] : Address; Address = entryLines[7] != String.Empty ? entryLines[7] : Address;
Publisher = entryLines[8]; Publisher = entryLines[8];
try Pages = CreatePages(entryLines[9]);
{
FirstPage = ushort.Parse(entryLines[9].Split('-')[0]);
LastPage = ushort.Parse(entryLines[9].Split('-')[1].TrimEnd('.'));
}
catch (Exception)
{
throw new Exception("Formato numeri di pagina errato...");
}
if (entryLines.Length > 10) if (entryLines.Length > 10)
{ {
@ -283,7 +264,7 @@ namespace Txt2Bib.Records
$"\tseries = {{{Series}}},\n" + $"\tseries = {{{Series}}},\n" +
$"\taddress = \"{Address}\",\n" + $"\taddress = \"{Address}\",\n" +
$"\tyear = \"{Year}\",\n" + $"\tyear = \"{Year}\",\n" +
$"\tpages = \"{FirstPage}--{LastPage}\",\n" + $"\tpages = \"{Pages}\",\n" +
$"\turl = \"{Url}\",\n" + $"\turl = \"{Url}\",\n" +
$"\tdoi = \"{Doi}\",\n" + $"\tdoi = \"{Doi}\",\n" +
"}\n"; "}\n";

View File

@ -43,8 +43,8 @@ namespace Txt2Bib
{ {
using var reader = File.OpenText(path); using var reader = File.OpenText(path);
byte[] contentBytes = File.ReadAllBytes(path); byte[] contentBytes = File.ReadAllBytes(path);
// Use Windows 1252?? // Assumes UTF8!
var content = Encoding.Latin1.GetString(contentBytes); var content = Encoding.UTF8.GetString(contentBytes);
IEnumerable<string> entries = new List<string>(); IEnumerable<string> entries = new List<string>();
try try